[2/6] Support Intel AVX-VNNI-INT8
Checks
Commit Message
From: Kong Lingling <lingling.kong@intel.com>
gcc/ChangeLog
* common/config/i386/cpuinfo.h (get_available_features): Detect
avxvnniint8.
* common/config/i386/i386-common.cc
(OPTION_MASK_ISA2_AVXVNNIINT8_SET): New.
(OPTION_MASK_ISA2_AVXVNNIINT8_UNSET): Ditto.
(ix86_handle_option): Handle -mavxvnniint8.
* common/config/i386/i386-cpuinfo.h (enum processor_features):
Add FEATURE_AVXVNNIINT8.
* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
avxvnniint8.
* config.gcc: Add avxvnniint8intrin.h.
* config/i386/avxvnniint8intrin.h: New file.
* config/i386/cpuid.h (bit_AVXVNNIINT8): New.
* config/i386/i386-builtin.def: Add new builtins.
* config/i386/i386-c.cc (ix86_target_macros_internal): Define
__AVXVNNIINT8__.
* config/i386/i386-options.cc (isa2_opts): Add -mavxvnniint8.
(ix86_valid_target_attribute_inner_p): Handle avxvnniint8.
* config/i386/i386-isa.def: Add DEF_PTA(AVXVNNIINT8) New..
* config/i386/i386.opt: Add option -mavxvnniint8.
* config/i386/immintrin.h: Include avxvnniint8intrin.h.
* config/i386/sse.md
(vpdp<vpdotprodtype>_<mode>): New define_insn.
* doc/extend.texi: Document avxvnniint8.
* doc/invoke.texi: Document -mavxvnniint8.
* doc/sourcebuild.texi: Document target avxvnniint8.
gcc/testsuite/ChangeLog
* g++.dg/other/i386-2.C: Add -mavxvnniint8.
* g++.dg/other/i386-3.C: Ditto.
* gcc.target/i386/avx-check.h: Add avxvnniint8 check.
* gcc.target/i386/sse-12.c: Add -mavxvnniint8.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Ditto.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/funcspec-56.inc: Add new target attribute.
* lib/target-supports.exp
(check_effective_target_avxvnniint8): New.
* gcc.target/i386/avxvnniint8-1.c: Ditto.
* gcc.target/i386/avxvnniint8-vpdpbssd-2.c: Ditto.
* gcc.target/i386/avxvnniint8-vpdpbssds-2.c: Ditto.
* gcc.target/i386/avxvnniint8-vpdpbsud-2.c: Ditto.
* gcc.target/i386/avxvnniint8-vpdpbsuds-2.c: Ditto.
* gcc.target/i386/avxvnniint8-vpdpbuud-2.c: Ditto.
* gcc.target/i386/avxvnniint8-vpdpbuuds-2.c: Ditto.
Co-authored-by: Hongyu Wang <hongyu.wang@intel.com>
Co-authored-by: Haochen Jiang <haochen.jiang@intel.com>
---
gcc/common/config/i386/cpuinfo.h | 2 +
gcc/common/config/i386/i386-common.cc | 22 ++-
gcc/common/config/i386/i386-cpuinfo.h | 1 +
gcc/common/config/i386/i386-isas.h | 2 +
gcc/config.gcc | 2 +-
gcc/config/i386/avxvnniint8intrin.h | 138 ++++++++++++++++++
gcc/config/i386/cpuid.h | 1 +
gcc/config/i386/i386-builtin.def | 14 ++
gcc/config/i386/i386-c.cc | 2 +
gcc/config/i386/i386-isa.def | 1 +
gcc/config/i386/i386-options.cc | 4 +-
gcc/config/i386/i386.opt | 5 +
gcc/config/i386/immintrin.h | 2 +
gcc/config/i386/sse.md | 31 ++++
gcc/doc/extend.texi | 5 +
gcc/doc/invoke.texi | 9 +-
gcc/doc/sourcebuild.texi | 3 +
gcc/testsuite/g++.dg/other/i386-2.C | 2 +-
gcc/testsuite/g++.dg/other/i386-3.C | 2 +-
gcc/testsuite/gcc.target/i386/avx-check.h | 3 +
gcc/testsuite/gcc.target/i386/avxvnniint8-1.c | 43 ++++++
.../gcc.target/i386/avxvnniint8-vpdpbssd-2.c | 72 +++++++++
.../gcc.target/i386/avxvnniint8-vpdpbssds-2.c | 72 +++++++++
.../gcc.target/i386/avxvnniint8-vpdpbsud-2.c | 72 +++++++++
.../gcc.target/i386/avxvnniint8-vpdpbsuds-2.c | 72 +++++++++
.../gcc.target/i386/avxvnniint8-vpdpbuud-2.c | 72 +++++++++
.../gcc.target/i386/avxvnniint8-vpdpbuuds-2.c | 72 +++++++++
gcc/testsuite/gcc.target/i386/funcspec-56.inc | 2 +
gcc/testsuite/gcc.target/i386/sse-12.c | 2 +-
gcc/testsuite/gcc.target/i386/sse-13.c | 2 +-
gcc/testsuite/gcc.target/i386/sse-14.c | 2 +-
gcc/testsuite/gcc.target/i386/sse-22.c | 4 +-
gcc/testsuite/gcc.target/i386/sse-23.c | 2 +-
gcc/testsuite/lib/target-supports.exp | 12 ++
34 files changed, 738 insertions(+), 14 deletions(-)
create mode 100644 gcc/config/i386/avxvnniint8intrin.h
create mode 100644 gcc/testsuite/gcc.target/i386/avxvnniint8-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbssd-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbssds-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbsud-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbsuds-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbuud-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbuuds-2.c
Comments
On Fri, Oct 14, 2022 at 3:57 PM Haochen Jiang via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> From: Kong Lingling <lingling.kong@intel.com>
>
> gcc/ChangeLog
>
> * common/config/i386/cpuinfo.h (get_available_features): Detect
> avxvnniint8.
> * common/config/i386/i386-common.cc
> (OPTION_MASK_ISA2_AVXVNNIINT8_SET): New.
> (OPTION_MASK_ISA2_AVXVNNIINT8_UNSET): Ditto.
> (ix86_handle_option): Handle -mavxvnniint8.
> * common/config/i386/i386-cpuinfo.h (enum processor_features):
> Add FEATURE_AVXVNNIINT8.
> * common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
> avxvnniint8.
> * config.gcc: Add avxvnniint8intrin.h.
> * config/i386/avxvnniint8intrin.h: New file.
> * config/i386/cpuid.h (bit_AVXVNNIINT8): New.
> * config/i386/i386-builtin.def: Add new builtins.
> * config/i386/i386-c.cc (ix86_target_macros_internal): Define
> __AVXVNNIINT8__.
> * config/i386/i386-options.cc (isa2_opts): Add -mavxvnniint8.
> (ix86_valid_target_attribute_inner_p): Handle avxvnniint8.
> * config/i386/i386-isa.def: Add DEF_PTA(AVXVNNIINT8) New..
> * config/i386/i386.opt: Add option -mavxvnniint8.
> * config/i386/immintrin.h: Include avxvnniint8intrin.h.
> * config/i386/sse.md
> (vpdp<vpdotprodtype>_<mode>): New define_insn.
> * doc/extend.texi: Document avxvnniint8.
> * doc/invoke.texi: Document -mavxvnniint8.
> * doc/sourcebuild.texi: Document target avxvnniint8.
>
> gcc/testsuite/ChangeLog
>
> * g++.dg/other/i386-2.C: Add -mavxvnniint8.
> * g++.dg/other/i386-3.C: Ditto.
> * gcc.target/i386/avx-check.h: Add avxvnniint8 check.
> * gcc.target/i386/sse-12.c: Add -mavxvnniint8.
> * gcc.target/i386/sse-13.c: Ditto.
> * gcc.target/i386/sse-14.c: Ditto.
> * gcc.target/i386/sse-22.c: Ditto.
> * gcc.target/i386/sse-23.c: Ditto.
> * gcc.target/i386/funcspec-56.inc: Add new target attribute.
> * lib/target-supports.exp
> (check_effective_target_avxvnniint8): New.
> * gcc.target/i386/avxvnniint8-1.c: Ditto.
> * gcc.target/i386/avxvnniint8-vpdpbssd-2.c: Ditto.
> * gcc.target/i386/avxvnniint8-vpdpbssds-2.c: Ditto.
> * gcc.target/i386/avxvnniint8-vpdpbsud-2.c: Ditto.
> * gcc.target/i386/avxvnniint8-vpdpbsuds-2.c: Ditto.
> * gcc.target/i386/avxvnniint8-vpdpbuud-2.c: Ditto.
> * gcc.target/i386/avxvnniint8-vpdpbuuds-2.c: Ditto.
>
> Co-authored-by: Hongyu Wang <hongyu.wang@intel.com>
> Co-authored-by: Haochen Jiang <haochen.jiang@intel.com>
> ---
> gcc/common/config/i386/cpuinfo.h | 2 +
> gcc/common/config/i386/i386-common.cc | 22 ++-
> gcc/common/config/i386/i386-cpuinfo.h | 1 +
> gcc/common/config/i386/i386-isas.h | 2 +
> gcc/config.gcc | 2 +-
> gcc/config/i386/avxvnniint8intrin.h | 138 ++++++++++++++++++
> gcc/config/i386/cpuid.h | 1 +
> gcc/config/i386/i386-builtin.def | 14 ++
> gcc/config/i386/i386-c.cc | 2 +
> gcc/config/i386/i386-isa.def | 1 +
> gcc/config/i386/i386-options.cc | 4 +-
> gcc/config/i386/i386.opt | 5 +
> gcc/config/i386/immintrin.h | 2 +
> gcc/config/i386/sse.md | 31 ++++
> gcc/doc/extend.texi | 5 +
> gcc/doc/invoke.texi | 9 +-
> gcc/doc/sourcebuild.texi | 3 +
> gcc/testsuite/g++.dg/other/i386-2.C | 2 +-
> gcc/testsuite/g++.dg/other/i386-3.C | 2 +-
> gcc/testsuite/gcc.target/i386/avx-check.h | 3 +
> gcc/testsuite/gcc.target/i386/avxvnniint8-1.c | 43 ++++++
> .../gcc.target/i386/avxvnniint8-vpdpbssd-2.c | 72 +++++++++
> .../gcc.target/i386/avxvnniint8-vpdpbssds-2.c | 72 +++++++++
> .../gcc.target/i386/avxvnniint8-vpdpbsud-2.c | 72 +++++++++
> .../gcc.target/i386/avxvnniint8-vpdpbsuds-2.c | 72 +++++++++
> .../gcc.target/i386/avxvnniint8-vpdpbuud-2.c | 72 +++++++++
> .../gcc.target/i386/avxvnniint8-vpdpbuuds-2.c | 72 +++++++++
> gcc/testsuite/gcc.target/i386/funcspec-56.inc | 2 +
> gcc/testsuite/gcc.target/i386/sse-12.c | 2 +-
> gcc/testsuite/gcc.target/i386/sse-13.c | 2 +-
> gcc/testsuite/gcc.target/i386/sse-14.c | 2 +-
> gcc/testsuite/gcc.target/i386/sse-22.c | 4 +-
> gcc/testsuite/gcc.target/i386/sse-23.c | 2 +-
> gcc/testsuite/lib/target-supports.exp | 12 ++
> 34 files changed, 738 insertions(+), 14 deletions(-)
> create mode 100644 gcc/config/i386/avxvnniint8intrin.h
> create mode 100644 gcc/testsuite/gcc.target/i386/avxvnniint8-1.c
> create mode 100644 gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbssd-2.c
> create mode 100644 gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbssds-2.c
> create mode 100644 gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbsud-2.c
> create mode 100644 gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbsuds-2.c
> create mode 100644 gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbuud-2.c
> create mode 100644 gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbuuds-2.c
>
> diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
> index 9bb21c6cacc..bed88003f8e 100644
> --- a/gcc/common/config/i386/cpuinfo.h
> +++ b/gcc/common/config/i386/cpuinfo.h
> @@ -795,6 +795,8 @@ get_available_features (struct __processor_model *cpu_model,
> set_feature (FEATURE_AVXVNNI);
> if (eax & bit_AVXIFMA)
> set_feature (FEATURE_AVXIFMA);
> + if (edx & bit_AVXVNNIINT8)
> + set_feature (FEATURE_AVXVNNIINT8);
> }
> if (avx512_usable)
> {
> diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
> index 4de7906b247..6a2a7e3d25a 100644
> --- a/gcc/common/config/i386/i386-common.cc
> +++ b/gcc/common/config/i386/i386-common.cc
> @@ -108,6 +108,7 @@ along with GCC; see the file COPYING3. If not see
> #define OPTION_MASK_ISA2_AMX_TILE_SET OPTION_MASK_ISA2_AMX_TILE
> #define OPTION_MASK_ISA2_AMX_INT8_SET OPTION_MASK_ISA2_AMX_INT8
> #define OPTION_MASK_ISA2_AMX_BF16_SET OPTION_MASK_ISA2_AMX_BF16
> +#define OPTION_MASK_ISA2_AVXVNNIINT8_SET OPTION_MASK_ISA2_AVXVNNIINT8
>
> /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
> as -msse4.2. */
> @@ -214,7 +215,7 @@ along with GCC; see the file COPYING3. If not see
> (OPTION_MASK_ISA_AVX2 | OPTION_MASK_ISA_AVX512F_UNSET)
> #define OPTION_MASK_ISA2_AVX2_UNSET \
> (OPTION_MASK_ISA2_AVXIFMA_UNSET | OPTION_MASK_ISA2_AVXVNNI_UNSET \
> - | OPTION_MASK_ISA2_AVX512F_UNSET)
> + | OPTION_MASK_ISA2_AVXVNNIINT8_UNSET | OPTION_MASK_ISA2_AVX512F_UNSET)
> #define OPTION_MASK_ISA_AVX512F_UNSET \
> (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD_UNSET \
> | OPTION_MASK_ISA_AVX512PF_UNSET | OPTION_MASK_ISA_AVX512ER_UNSET \
> @@ -278,6 +279,7 @@ along with GCC; see the file COPYING3. If not see
> #define OPTION_MASK_ISA2_KL_UNSET \
> (OPTION_MASK_ISA2_KL | OPTION_MASK_ISA2_WIDEKL_UNSET)
> #define OPTION_MASK_ISA2_WIDEKL_UNSET OPTION_MASK_ISA2_WIDEKL
> +#define OPTION_MASK_ISA2_AVXVNNIINT8_UNSET OPTION_MASK_ISA2_AVXVNNIINT8
>
> /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
> as -mno-sse4.1. */
> @@ -1142,6 +1144,24 @@ ix86_handle_option (struct gcc_options *opts,
> }
> return true;
>
> + case OPT_mavxvnniint8:
> + if (value)
> + {
> + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVXVNNIINT8_SET;
> + opts->x_ix86_isa_flags2_explicit |=
> + OPTION_MASK_ISA2_AVXVNNIINT8_SET;
> + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2_SET;
> + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX2_SET;
> + }
> + else
> + {
> + opts->x_ix86_isa_flags2 &=
> + ~OPTION_MASK_ISA2_AVXVNNIINT8_UNSET;
> + opts->x_ix86_isa_flags2_explicit |=
> + OPTION_MASK_ISA2_AVXVNNIINT8_UNSET;
> + }
> + return true;
> +
> case OPT_mfma:
> if (value)
> {
> diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h
> index 968f9a56a6c..9a6b92fab79 100644
> --- a/gcc/common/config/i386/i386-cpuinfo.h
> +++ b/gcc/common/config/i386/i386-cpuinfo.h
> @@ -241,6 +241,7 @@ enum processor_features
> FEATURE_X86_64_V3,
> FEATURE_X86_64_V4,
> FEATURE_AVXIFMA,
> + FEATURE_AVXVNNIINT8,
> CPU_FEATURE_MAX
> };
>
> diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h
> index b05b4bb8f0d..8c1f351056c 100644
> --- a/gcc/common/config/i386/i386-isas.h
> +++ b/gcc/common/config/i386/i386-isas.h
> @@ -176,4 +176,6 @@ ISA_NAMES_TABLE_START
> ISA_NAMES_TABLE_ENTRY("x86-64-v3", FEATURE_X86_64_V3, P_X86_64_V3, NULL)
> ISA_NAMES_TABLE_ENTRY("x86-64-v4", FEATURE_X86_64_V4, P_X86_64_V4, NULL)
> ISA_NAMES_TABLE_ENTRY("avxifma", FEATURE_AVXIFMA, P_NONE, "-mavxifma")
> + ISA_NAMES_TABLE_ENTRY("avxvnniint8", FEATURE_AVXVNNIINT8,
> + P_NONE, "-mavxvnniint8")
> ISA_NAMES_TABLE_END
> diff --git a/gcc/config.gcc b/gcc/config.gcc
> index 12365abbf86..4df78238910 100644
> --- a/gcc/config.gcc
> +++ b/gcc/config.gcc
> @@ -422,7 +422,7 @@ i[34567]86-*-* | x86_64-*-*)
> amxbf16intrin.h x86gprintrin.h uintrintrin.h
> hresetintrin.h keylockerintrin.h avxvnniintrin.h
> mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h
> - avxifmaintrin.h"
> + avxifmaintrin.h avxvnniint8intrin.h"
> ;;
> ia64-*-*)
> extra_headers=ia64intrin.h
> diff --git a/gcc/config/i386/avxvnniint8intrin.h b/gcc/config/i386/avxvnniint8intrin.h
> new file mode 100644
> index 00000000000..362e6f65c2a
> --- /dev/null
> +++ b/gcc/config/i386/avxvnniint8intrin.h
> @@ -0,0 +1,138 @@
> +/* Copyright (C) 2020 Free Software Foundation, Inc.
> +
> + This file is part of GCC.
> +
> + GCC is free software; you can redistribute it and/or modify
> + it under the terms of the GNU General Public License as published by
> + the Free Software Foundation; either version 3, or (at your option)
> + any later version.
> +
> + GCC is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + GNU General Public License for more details.
> +
> + Under Section 7 of GPL version 3, you are granted additional
> + permissions described in the GCC Runtime Library Exception, version
> + 3.1, as published by the Free Software Foundation.
> +
> + You should have received a copy of the GNU General Public License and
> + a copy of the GCC Runtime Library Exception along with this program;
> + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
> + <http://www.gnu.org/licenses/>. */
> +
> +#if !defined _IMMINTRIN_H_INCLUDED
> +#error "Never use <avxvnniint8vlintrin.h> directly; include <immintrin.h> instead."
> +#endif
> +
> +#ifndef _AVXVNNIINT8INTRIN_H_INCLUDED
> +#define _AVXVNNIINT8INTRIN_H_INCLUDED
> +
> +#if !defined(__AVXVNNIINT8__)
> +#pragma GCC push_options
> +#pragma GCC target("avxvnniint8")
> +#define __DISABLE_AVXVNNIINT8__
> +#endif /* __AVXVNNIINT8__ */
> +
> +extern __inline __m128i
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_dpbssd_epi32 (__m128i __W, __m128i __A, __m128i __B)
> +{
> + return (__m128i)
> + __builtin_ia32_vpdpbssd128 ((__v4si) __W, (__v4si) __A, (__v4si) __B);
> +}
> +
> +extern __inline __m128i
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_dpbssds_epi32 (__m128i __W, __m128i __A, __m128i __B)
> +{
> + return (__m128i)
> + __builtin_ia32_vpdpbssds128 ((__v4si) __W, (__v4si) __A, (__v4si) __B);
> +}
> +
> +extern __inline __m128i
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_dpbsud_epi32 (__m128i __W, __m128i __A, __m128i __B)
> +{
> + return (__m128i)
> + __builtin_ia32_vpdpbsud128 ((__v4si) __W, (__v4si) __A, (__v4si) __B);
> +}
> +
> +extern __inline __m128i
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_dpbsuds_epi32 (__m128i __W, __m128i __A, __m128i __B)
> +{
> + return (__m128i)
> + __builtin_ia32_vpdpbsuds128 ((__v4si) __W, (__v4si) __A, (__v4si) __B);
> +}
> +
> +extern __inline __m128i
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_dpbuud_epi32 (__m128i __W, __m128i __A, __m128i __B)
> +{
> + return (__m128i)
> + __builtin_ia32_vpdpbuud128 ((__v4si) __W, (__v4si) __A, (__v4si) __B);
> +}
> +
> +extern __inline __m128i
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_dpbuuds_epi32 (__m128i __W, __m128i __A, __m128i __B)
> +{
> + return (__m128i)
> + __builtin_ia32_vpdpbuuds128 ((__v4si) __W, (__v4si) __A, (__v4si) __B);
> +}
> +
> +extern __inline __m256i
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_dpbssd_epi32 (__m256i __W, __m256i __A, __m256i __B)
> +{
> + return (__m256i)
> + __builtin_ia32_vpdpbssd256 ((__v8si) __W, (__v8si) __A, (__v8si) __B);
> +}
> +
> +extern __inline __m256i
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_dpbssds_epi32 (__m256i __W, __m256i __A, __m256i __B)
> +{
> + return (__m256i)
> + __builtin_ia32_vpdpbssds256 ((__v8si) __W, (__v8si) __A, (__v8si) __B);
> +}
> +
> +extern __inline __m256i
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_dpbsud_epi32 (__m256i __W, __m256i __A, __m256i __B)
> +{
> + return (__m256i)
> + __builtin_ia32_vpdpbsud256 ((__v8si) __W, (__v8si) __A, (__v8si) __B);
> +}
> +
> +extern __inline __m256i
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_dpbsuds_epi32 (__m256i __W, __m256i __A, __m256i __B)
> +{
> + return (__m256i)
> + __builtin_ia32_vpdpbsuds256 ((__v8si) __W, (__v8si) __A, (__v8si) __B);
> +}
> +
> +extern __inline __m256i
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_dpbuud_epi32 (__m256i __W, __m256i __A, __m256i __B)
> +{
> + return (__m256i)
> + __builtin_ia32_vpdpbuud256 ((__v8si) __W, (__v8si) __A, (__v8si) __B);
> +}
> +
> +extern __inline __m256i
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_dpbuuds_epi32 (__m256i __W, __m256i __A, __m256i __B)
> +{
> + return (__m256i)
> + __builtin_ia32_vpdpbuuds256 ((__v8si) __W, (__v8si) __A, (__v8si) __B);
> +}
> +
> +#ifdef __DISABLE_AVXVNNIINT8__
> +#undef __DISABLE_AVXVNNIINT8__
> +#pragma GCC pop_options
> +#endif /* __DISABLE_AVXVNNIINT8__ */
> +
> +#endif /* __AVXVNNIINT8INTRIN_H_INCLUDED */
> diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
> index 9885699efd5..f5fad22149a 100644
> --- a/gcc/config/i386/cpuid.h
> +++ b/gcc/config/i386/cpuid.h
> @@ -49,6 +49,7 @@
> #define bit_RDRND (1 << 30)
>
> /* %edx */
> +#define bit_AVXVNNIINT8 (1 << 4)
> #define bit_CMPXCHG8B (1 << 8)
> #define bit_CMOV (1 << 15)
> #define bit_MMX (1 << 23)
> diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
> index 4a89099a00f..e6edae5728b 100644
> --- a/gcc/config/i386/i386-builtin.def
> +++ b/gcc/config/i386/i386-builtin.def
> @@ -2696,6 +2696,20 @@ BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_A
> BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_mask, "__builtin_ia32_vpdpwssds_v4si_mask", IX86_BUILTIN_VPDPWSSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
> BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_maskz, "__builtin_ia32_vpdpwssds_v4si_maskz", IX86_BUILTIN_VPDPWSSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
>
> +/* AVXVNNIINT8 */
> +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbssd_v8si, "__builtin_ia32_vpdpbssd256", IX86_BUILTIN_VPDPBSSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
> +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbssds_v8si, "__builtin_ia32_vpdpbssds256", IX86_BUILTIN_VPDPBSSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
> +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbsud_v8si, "__builtin_ia32_vpdpbsud256", IX86_BUILTIN_VPDPBSUDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
> +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbsuds_v8si, "__builtin_ia32_vpdpbsuds256", IX86_BUILTIN_VPDPBSUDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
> +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbuud_v8si, "__builtin_ia32_vpdpbuud256", IX86_BUILTIN_VPDPBUUDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
> +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbuuds_v8si, "__builtin_ia32_vpdpbuuds256", IX86_BUILTIN_VPDPBUUDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
> +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbssd_v4si, "__builtin_ia32_vpdpbssd128", IX86_BUILTIN_VPDPBSSDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
> +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbssds_v4si, "__builtin_ia32_vpdpbssds128", IX86_BUILTIN_VPDPBSSDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
> +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbsud_v4si, "__builtin_ia32_vpdpbsud128", IX86_BUILTIN_VPDPBSUDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
> +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbsuds_v4si, "__builtin_ia32_vpdpbsuds128", IX86_BUILTIN_VPDPBSUDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
> +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbuud_v4si, "__builtin_ia32_vpdpbuud128", IX86_BUILTIN_VPDPBUUDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
> +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbuuds_v4si, "__builtin_ia32_vpdpbuuds128", IX86_BUILTIN_VPDPBUUDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
> +
> /* VPCLMULQDQ */
> BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpclmulqdq_v2di, "__builtin_ia32_vpclmulqdq_v2di", IX86_BUILTIN_VPCLMULQDQ2, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT)
> BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX, 0, CODE_FOR_vpclmulqdq_v4di, "__builtin_ia32_vpclmulqdq_v4di", IX86_BUILTIN_VPCLMULQDQ4, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT)
> diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
> index 3494ec035d5..a9a35c0a18a 100644
> --- a/gcc/config/i386/i386-c.cc
> +++ b/gcc/config/i386/i386-c.cc
> @@ -635,6 +635,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
> def_or_undef (parse_in, "__AVXVNNI__");
> if (isa_flag2 & OPTION_MASK_ISA2_AVXIFMA)
> def_or_undef (parse_in, "__AVXIFMA__");
> + if (isa_flag2 & OPTION_MASK_ISA2_AVXVNNIINT8)
> + def_or_undef (parse_in, "__AVXVNNIINT8__");
> if (TARGET_IAMCU)
> {
> def_or_undef (parse_in, "__iamcu");
> diff --git a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def
> index 6e0254ce418..c95b917c6ce 100644
> --- a/gcc/config/i386/i386-isa.def
> +++ b/gcc/config/i386/i386-isa.def
> @@ -110,3 +110,4 @@ DEF_PTA(WIDEKL)
> DEF_PTA(AVXVNNI)
> DEF_PTA(AVX512FP16)
> DEF_PTA(AVXIFMA)
> +DEF_PTA(AVXVNNIINT8)
> diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
> index 5facb64c2a8..3e6d04433a6 100644
> --- a/gcc/config/i386/i386-options.cc
> +++ b/gcc/config/i386/i386-options.cc
> @@ -227,7 +227,8 @@ static struct ix86_target_opts isa2_opts[] =
> { "-mwidekl", OPTION_MASK_ISA2_WIDEKL },
> { "-mavxvnni", OPTION_MASK_ISA2_AVXVNNI },
> { "-mavx512fp16", OPTION_MASK_ISA2_AVX512FP16 },
> - { "-mavxifma", OPTION_MASK_ISA2_AVXIFMA }
> + { "-mavxifma", OPTION_MASK_ISA2_AVXIFMA },
> + { "-mavxvnniint8", OPTION_MASK_ISA2_AVXVNNIINT8 }
> };
> static struct ix86_target_opts isa_opts[] =
> {
> @@ -1074,6 +1075,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
> IX86_ATTR_ISA ("avxvnni", OPT_mavxvnni),
> IX86_ATTR_ISA ("avx512fp16", OPT_mavx512fp16),
> IX86_ATTR_ISA ("avxifma", OPT_mavxifma),
> + IX86_ATTR_ISA ("avxvnniint8", OPT_mavxvnniint8),
>
> /* enum options */
> IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
> diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
> index 36e28b7063d..53d534f6392 100644
> --- a/gcc/config/i386/i386.opt
> +++ b/gcc/config/i386/i386.opt
> @@ -1219,3 +1219,8 @@ mavxifma
> Target Mask(ISA2_AVXIFMA) Var(ix86_isa_flags2) Save
> Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, and
> AVXIFMA built-in functions and code generation.
> +
> +mavxvnniint8
> +Target Mask(ISA2_AVXVNNIINT8) Var(ix86_isa_flags2) Save
> +Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and
> +AVXVNNIINT8 built-in functions and code generation.
> diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
> index e9d4e975243..ddea249d09b 100644
> --- a/gcc/config/i386/immintrin.h
> +++ b/gcc/config/i386/immintrin.h
> @@ -46,6 +46,8 @@
>
> #include <avxifmaintrin.h>
>
> +#include <avxvnniint8intrin.h>
> +
> #include <avx2intrin.h>
>
> #include <avx512fintrin.h>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 331347569ea..49490a213ea 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -200,6 +200,13 @@
> UNSPEC_COMPLEX_FCMUL
> UNSPEC_COMPLEX_MASK
>
> + ;; For AVX-VNNI-INT8 support
> + UNSPEC_VPDPBSSD
> + UNSPEC_VPDPBSSDS
> + UNSPEC_VPDPBSUD
> + UNSPEC_VPDPBSUDS
> + UNSPEC_VPDPBUUD
> + UNSPEC_VPDPBUUDS
> ])
Can we have unified naming conversion for the UNSPECs and those in
AVX512(AVX)_VNNI, since they have similar semantics.
Maybe change the old UNSPEC_VPMADDUBSWACCD to the "new-style"
UNSPEC_VPDPBUSD? Similar for other UNSPEC_VPMADD***.
>
> (define_c_enum "unspecv" [
> @@ -29241,3 +29248,27 @@
> gcc_unreachable ();
> DONE;
> })
> +
> +(define_int_iterator VPDOTPROD
> + [UNSPEC_VPDPBSSD
> + UNSPEC_VPDPBSSDS
> + UNSPEC_VPDPBSUD
> + UNSPEC_VPDPBSUDS
> + UNSPEC_VPDPBUUD
> + UNSPEC_VPDPBUUDS])
> +
> +(define_int_attr vpdotprodtype
> + [(UNSPEC_VPDPBSSD "bssd") (UNSPEC_VPDPBSSDS "bssds")
> + (UNSPEC_VPDPBSUD "bsud") (UNSPEC_VPDPBSUDS "bsuds")
> + (UNSPEC_VPDPBUUD "buud") (UNSPEC_VPDPBUUDS "buuds")])
> +
> +(define_insn "vpdp<vpdotprodtype>_<mode>"
> + [(set (match_operand:VI4_AVX 0 "register_operand" "=x")
> + (unspec:VI4_AVX
> + [(match_operand:VI4_AVX 1 "register_operand" "0")
> + (match_operand:VI4_AVX 2 "register_operand" "x")
> + (match_operand:VI4_AVX 3 "nonimmediate_operand" "xm")]
> + VPDOTPROD))]
> + "TARGET_AVXVNNIINT8"
> + "vpdp<vpdotprodtype>\t{%3, %2, %0|%0, %2, %3}"
> + [(set_attr "prefix" "vex")])
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index edecf5c0070..9a8de9fc226 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -7065,6 +7065,11 @@ Enable/disable the generation of the AVXVNNI instructions.
> @cindex @code{target("avxifma")} function attribute, x86
> Enable/disable the generation of the AVXIFMA instructions.
>
> +@item avxvnniint8
> +@itemx no-avxvnniint8
> +@cindex @code{target("avxvnniint8")} function attribute, x86
> +Enable/disable the generation of the AVXVNNIINT8 instructions.
> +
> @item cld
> @itemx no-cld
> @cindex @code{target("cld")} function attribute, x86
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 886fc1d0164..d4ff7549bf3 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -1436,7 +1436,7 @@ See RS/6000 and PowerPC Options.
> -mavx5124fmaps -mavx512vnni -mavx5124vnniw -mprfchw -mrdpid @gol
> -mrdseed -msgx -mavx512vp2intersect -mserialize -mtsxldtrk@gol
> -mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset -mavxvnni@gol
> --mavx512fp16 -mavxifma @gol
> +-mavx512fp16 -mavxifma -mavxvnniint8 @gol
> -mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops @gol
> -minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol
> -mkl -mwidekl @gol
> @@ -32896,6 +32896,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
> @need 200
> @itemx -mavxifma
> @opindex mavxifma
> +@need 200
> +@itemx -mavxvnniint8
> +@opindex mavxvnniint8
> These switches enable the use of instructions in the MMX, SSE,
> SSE2, SSE3, SSSE3, SSE4, SSE4A, SSE4.1, SSE4.2, AVX, AVX2, AVX512F, AVX512PF,
> AVX512ER, AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA,
> @@ -32906,8 +32909,8 @@ XSAVEOPT, XSAVEC, XSAVES, RTM, HLE, TBM, MWAITX, CLZERO, PKU, AVX512VBMI2,
> GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16,
> ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, SERIALIZE,
> UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512FP16,
> -AVXIFMA or CLDEMOTE extended instruction sets. Each has a corresponding
> -@option{-mno-} option to disable use of these instructions.
> +AVXIFMA, AVXVNNIINT8 or CLDEMOTE extended instruction sets. Each has a
> +corresponding @option{-mno-} option to disable use of these instructions.
>
> These extensions are also available as built-in functions: see
> @ref{x86 Built-in Functions}, for details of the functions enabled and
> diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
> index 0173acf4a65..e21a1d381e0 100644
> --- a/gcc/doc/sourcebuild.texi
> +++ b/gcc/doc/sourcebuild.texi
> @@ -2493,6 +2493,9 @@ Target supports the execution of @code{avx512vp2intersect} instructions.
> @item avxifma
> Target supports the execution of @code{avxifma} instructions.
>
> +@item avxvnniint8
> +Target supports the execution of @code{avxvnniint8} instructions.
> +
> @item amx_tile
> Target supports the execution of @code{amx-tile} instructions.
>
> diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C
> index 5388606779b..ebd01fe47bc 100644
> --- a/gcc/testsuite/g++.dg/other/i386-2.C
> +++ b/gcc/testsuite/g++.dg/other/i386-2.C
> @@ -1,5 +1,5 @@
> /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
> -/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma" } */
> +/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8" } */
>
> /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
> xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
> diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C
> index 86cedd3d32f..b66498f1d4c 100644
> --- a/gcc/testsuite/g++.dg/other/i386-3.C
> +++ b/gcc/testsuite/g++.dg/other/i386-3.C
> @@ -1,5 +1,5 @@
> /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
> -/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma" } */
> +/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8" } */
>
> /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
> xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
> diff --git a/gcc/testsuite/gcc.target/i386/avx-check.h b/gcc/testsuite/gcc.target/i386/avx-check.h
> index 24ee6ab4efd..77507ca2edc 100644
> --- a/gcc/testsuite/gcc.target/i386/avx-check.h
> +++ b/gcc/testsuite/gcc.target/i386/avx-check.h
> @@ -25,6 +25,9 @@ main ()
> && avx_os_support ()
> #ifdef AVXIFMA
> && __builtin_cpu_supports ("avxifma")
> +#endif
> +#ifdef AVXVNNIINT8
> + && __builtin_cpu_supports ("avxvnniint8")
> #endif
> )
> {
> diff --git a/gcc/testsuite/gcc.target/i386/avxvnniint8-1.c b/gcc/testsuite/gcc.target/i386/avxvnniint8-1.c
> new file mode 100644
> index 00000000000..d6942f34d6e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avxvnniint8-1.c
> @@ -0,0 +1,43 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavxvnniint8 -O2" } */
> +/* { dg-final { scan-assembler-times "vpdpbssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vpdpbssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vpdpbssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vpdpbssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vpdpbsud\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vpdpbsud\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vpdpbsuds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vpdpbsuds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vpdpbuud\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vpdpbuud\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vpdpbuuds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vpdpbuuds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
> +
> +
> +#include <immintrin.h>
> +
> +volatile __m256i x,y,z;
> +volatile __m128i x_,y_,z_;
> +volatile __mmask8 m;
> +
> +void extern
> +avxvnniint8_test (void)
> +{
> + x = _mm256_dpbssd_epi32 (x, y, z);
> + x_ = _mm_dpbssd_epi32 (x_, y_, z_);
> +
> + x = _mm256_dpbssds_epi32 (x, y, z);
> + x_ = _mm_dpbssds_epi32 (x_, y_, z_);
> +
> + x = _mm256_dpbsud_epi32 (x, y, z);
> + x_ = _mm_dpbsud_epi32 (x_, y_, z_);
> +
> + x = _mm256_dpbsuds_epi32 (x, y, z);
> + x_ = _mm_dpbsuds_epi32 (x_, y_, z_);
> +
> + x = _mm256_dpbuud_epi32 (x, y, z);
> + x_ = _mm_dpbuud_epi32 (x_, y_, z_);
> +
> + x = _mm256_dpbuuds_epi32 (x, y, z);
> + x_ = _mm_dpbuuds_epi32 (x_, y_, z_);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbssd-2.c b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbssd-2.c
> new file mode 100644
> index 00000000000..5016de39621
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbssd-2.c
> @@ -0,0 +1,72 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -mavxvnniint8" } */
> +/* { dg-require-effective-target avxvnniint8 } */
> +#define AVXVNNIINT8
> +#ifndef CHECK
> +#define CHECK "avx-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST avx_test
> +#endif
> +
> +#include CHECK
> +
> +static void
> +CALC (int *r, int *dst, char *s1, char *s2, int size)
> +{
> + short tempres[32];
> + for (int i = 0; i < size; i++) {
> + tempres[i] = (short) s1[i] * (short) s2[i];
> + }
> + for (int i = 0; i < size / 4; i++) {
> + long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
> + + tempres[i * 4 + 2] + tempres[i * 4 + 3];
> + r[i] = test;
> + }
> +}
> +
> +void
> +TEST (void)
> +{
> + int i;
> + union256i_d res_256;
> + union256i_b src2_256;
> + union256i_b src1_256;
> + int res_ref_256[8];
> +
> + for (i = 0; i < 32; i++)
> + {
> + int sign = i % 2 ? 1 : -1;
> + src1_256.a[i] = 10 + 3 * i + sign;
> + src2_256.a[i] = sign * 10 * i * i;
> + }
> +
> + for (i = 0; i < 8; i++)
> + res_256.a[i] = 0x7fffffff;
> +
> + CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
> + res_256.x = _mm256_dpbssd_epi32 (res_256.x, src1_256.x, src2_256.x);
> + if (check_union256i_d (res_256, res_ref_256))
> + abort ();
> +
> + union128i_d res_128;
> + union128i_b src2_128;
> + union128i_b src1_128;
> + int res_ref_128[4];
> +
> + for (i = 0; i < 16; i++)
> + {
> + int sign = i % 2 ? 1 : -1;
> + src1_128.a[i] = 10 + 3 * i * i + sign;
> + src2_128.a[i] = sign * 10 * i * i;
> + }
> +
> + for (i = 0; i < 4; i++)
> + res_128.a[i] = 0x7fffffff;
> +
> + CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
> + res_128.x = _mm_dpbssd_epi32 (res_128.x, src1_128.x, src2_128.x);
> + if (check_union128i_d (res_128, res_ref_128))
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbssds-2.c b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbssds-2.c
> new file mode 100644
> index 00000000000..6de5062e917
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbssds-2.c
> @@ -0,0 +1,72 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -mavxvnniint8" } */
> +/* { dg-require-effective-target avxvnniint8 } */
> +#define AVXVNNIINT8
> +#ifndef CHECK
> +#define CHECK "avx-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST avx_test
> +#endif
> +
> +#include CHECK
> +
> +static void
> +CALC (int *r, int *dst, char *s1, char *s2, int size)
> +{
> + short tempres[32];
> + for (int i = 0; i < size; i++) {
> + tempres[i] = (short) s1[i] * (short) s2[i];
> + }
> + for (int i = 0; i < size / 4; i++) {
> + long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
> + + tempres[i * 4 + 2] + tempres[i * 4 + 3];
> + r[i] = test > 0x7FFFFFFF ? 0x7FFFFFFF : test;
> + }
> +}
> +
> +void
> +TEST (void)
> +{
> + int i;
> + union256i_d res_256;
> + union256i_b src2_256;
> + union256i_b src1_256;
> + int res_ref_256[8];
> +
> + for (i = 0; i < 32; i++)
> + {
> + int sign = i % 2 ? 1 : -1;
> + src1_256.a[i] = 10 + 3 * i + sign;
> + src2_256.a[i] = sign * 10 * i * i;
> + }
> +
> + for (i = 0; i < 8; i++)
> + res_256.a[i] = 0x7fffffff;
> +
> + CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
> + res_256.x = _mm256_dpbssds_epi32 (res_256.x, src1_256.x, src2_256.x);
> + if (check_union256i_d (res_256, res_ref_256))
> + abort ();
> +
> + union128i_d res_128;
> + union128i_b src2_128;
> + union128i_b src1_128;
> + int res_ref_128[4];
> +
> + for (i = 0; i < 16; i++)
> + {
> + int sign = i % 2 ? 1 : -1;
> + src1_128.a[i] = 10 + 3 * i * i + sign;
> + src2_128.a[i] = sign * 10 * i * i;
> + }
> +
> + for (i = 0; i < 4; i++)
> + res_128.a[i] = 0x7fffffff;
> +
> + CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
> + res_128.x = _mm_dpbssds_epi32 (res_128.x, src1_128.x, src2_128.x);
> + if (check_union128i_d (res_128, res_ref_128))
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbsud-2.c b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbsud-2.c
> new file mode 100644
> index 00000000000..6e4ffd1c7be
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbsud-2.c
> @@ -0,0 +1,72 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -mavxvnniint8" } */
> +/* { dg-require-effective-target avxvnniint8 } */
> +#define AVXVNNIINT8
> +#ifndef CHECK
> +#define CHECK "avx-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST avx_test
> +#endif
> +
> +#include CHECK
> +
> +static void
> +CALC (int *r, int *dst, char *s1, unsigned char *s2, int size)
> +{
> + short tempres[32];
> + for (int i = 0; i < size; i++) {
> + tempres[i] = (short) s1[i] * (unsigned short) s2[i];
> + }
> + for (int i = 0; i < size / 4; i++) {
> + long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
> + + tempres[i * 4 + 2] + tempres[i * 4 + 3];
> + r[i] = test;
> + }
> +}
> +
> +void
> +TEST (void)
> +{
> + int i;
> + union256i_d res_256;
> + union256i_b src1_256;
> + union256i_ub src2_256;
> + int res_ref_256[8];
> +
> + for (i = 0; i < 32; i++)
> + {
> + int sign = i % 2 ? 1 : -1;
> + src1_256.a[i] = 10 + 3 * i + sign;
> + src2_256.a[i] = sign * 10 * i * i;
> + }
> +
> + for (i = 0; i < 8; i++)
> + res_256.a[i] = 0x7fffffff;
> +
> + CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
> + res_256.x = _mm256_dpbsud_epi32 (res_256.x, src1_256.x, src2_256.x);
> + if (check_union256i_d (res_256, res_ref_256))
> + abort ();
> +
> + union128i_d res_128;
> + union128i_b src1_128;
> + union128i_ub src2_128;
> + int res_ref_128[4];
> +
> + for (i = 0; i < 16; i++)
> + {
> + int sign = i % 2 ? 1 : -1;
> + src1_128.a[i] = 10 + 3 * i * i + sign;
> + src2_128.a[i] = sign * 10 * i * i;
> + }
> +
> + for (i = 0; i < 4; i++)
> + res_128.a[i] = 0x7fffffff;
> +
> + CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
> + res_128.x = _mm_dpbsud_epi32 (res_128.x, src1_128.x, src2_128.x);
> + if (check_union128i_d (res_128, res_ref_128))
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbsuds-2.c b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbsuds-2.c
> new file mode 100644
> index 00000000000..ad4b6047ecd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbsuds-2.c
> @@ -0,0 +1,72 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -mavxvnniint8" } */
> +/* { dg-require-effective-target avxvnniint8 } */
> +#define AVXVNNIINT8
> +#ifndef CHECK
> +#define CHECK "avx-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST avx_test
> +#endif
> +
> +#include CHECK
> +
> +static void
> +CALC (int *r, int *dst, char *s1, unsigned char *s2, int size)
> +{
> + short tempres[32];
> + for (int i = 0; i < size; i++) {
> + tempres[i] = (short) s1[i] * (unsigned short) s2[i];
> + }
> + for (int i = 0; i < size / 4; i++) {
> + long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
> + + tempres[i * 4 + 2] + tempres[i * 4 + 3];
> + r[i] = test > 0x7FFFFFFF ? 0x7FFFFFFF : test;
> + }
> +}
> +
> +void
> +TEST (void)
> +{
> + int i;
> + union256i_d res_256;
> + union256i_b src1_256;
> + union256i_ub src2_256;
> + int res_ref_256[8];
> +
> + for (i = 0; i < 32; i++)
> + {
> + int sign = i % 2 ? 1 : -1;
> + src1_256.a[i] = 10 + 3 * i + sign;
> + src2_256.a[i] = sign * 10 * i * i;
> + }
> +
> + for (i = 0; i < 8; i++)
> + res_256.a[i] = 0x7fffffff;
> +
> + CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
> + res_256.x = _mm256_dpbsuds_epi32 (res_256.x, src1_256.x, src2_256.x);
> + if (check_union256i_d (res_256, res_ref_256))
> + abort ();
> +
> + union128i_d res_128;
> + union128i_b src1_128;
> + union128i_ub src2_128;
> + int res_ref_128[4];
> +
> + for (i = 0; i < 16; i++)
> + {
> + int sign = i % 2 ? 1 : -1;
> + src1_128.a[i] = 10 + 3 * i * i + sign;
> + src2_128.a[i] = sign * 10 * i * i;
> + }
> +
> + for (i = 0; i < 4; i++)
> + res_128.a[i] = 0x7fffffff;
> +
> + CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
> + res_128.x = _mm_dpbsuds_epi32 (res_128.x, src1_128.x, src2_128.x);
> + if (check_union128i_d (res_128, res_ref_128))
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbuud-2.c b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbuud-2.c
> new file mode 100644
> index 00000000000..6590915a459
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbuud-2.c
> @@ -0,0 +1,72 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -mavxvnniint8" } */
> +/* { dg-require-effective-target avxvnniint8 } */
> +#define AVXVNNIINT8
> +#ifndef CHECK
> +#define CHECK "avx-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST avx_test
> +#endif
> +
> +#include CHECK
> +
> +static void
> +CALC (unsigned int *r, unsigned int *dst, unsigned char *s1, unsigned char *s2, int size)
> +{
> + unsigned short tempres[32];
> + for (int i = 0; i < size; i++) {
> + tempres[i] = (unsigned short) s1[i] * (unsigned short) s2[i];
> + }
> + for (int i = 0; i < size / 4; i++) {
> + unsigned int test = (unsigned int) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
> + + tempres[i * 4 + 2] + tempres[i * 4 + 3];
> + r[i] = test;
> + }
> +}
> +
> +void
> +TEST (void)
> +{
> + int i;
> + union256i_ud res_256;
> + union256i_ub src2_256;
> + union256i_ub src1_256;
> + unsigned int res_ref_256[8];
> +
> + for (i = 0; i < 32; i++)
> + {
> + int sign = i % 2 ? 1 : -1;
> + src1_256.a[i] = 10 + 3 * i + sign;
> + src2_256.a[i] = sign * 10 * i * i;
> + }
> +
> + for (i = 0; i < 8; i++)
> + res_256.a[i] = 0x7fffffff;
> +
> + CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
> + res_256.x = _mm256_dpbuud_epi32 (res_256.x, src1_256.x, src2_256.x);
> + if (check_union256i_ud (res_256, res_ref_256))
> + abort ();
> +
> + union128i_ud res_128;
> + union128i_ub src2_128;
> + union128i_ub src1_128;
> + unsigned int res_ref_128[4];
> +
> + for (i = 0; i < 16; i++)
> + {
> + int sign = i % 2 ? 1 : -1;
> + src1_128.a[i] = 10 + 3 * i * i + sign;
> + src2_128.a[i] = sign * 10 * i * i;
> + }
> +
> + for (i = 0; i < 4; i++)
> + res_128.a[i] = 0x7fffffff;
> +
> + CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
> + res_128.x = _mm_dpbuud_epi32 (res_128.x, src1_128.x, src2_128.x);
> + if (check_union128i_ud (res_128, res_ref_128))
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbuuds-2.c b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbuuds-2.c
> new file mode 100644
> index 00000000000..970e4a5d408
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbuuds-2.c
> @@ -0,0 +1,72 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -mavxvnniint8" } */
> +/* { dg-require-effective-target avxvnniint8 } */
> +#define AVXVNNIINT8
> +#ifndef CHECK
> +#define CHECK "avx-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST avx_test
> +#endif
> +
> +#include CHECK
> +
> +static void
> +CALC (unsigned int *r, unsigned int *dst, unsigned char *s1, unsigned char *s2, int size)
> +{
> + unsigned short tempres[32];
> + for (int i = 0; i < size; i++) {
> + tempres[i] = (unsigned short) s1[i] * (unsigned short) s2[i];
> + }
> + for (int i = 0; i < size / 4; i++) {
> + unsigned int test = (unsigned int) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
> + + tempres[i * 4 + 2] + tempres[i * 4 + 3];
> + r[i] = test > 0xFFFFFFFF ? 0xFFFFFFFF : test;
> + }
> +}
> +
> +void
> +TEST (void)
> +{
> + int i;
> + union256i_ud res_256;
> + union256i_ub src2_256;
> + union256i_ub src1_256;
> + unsigned int res_ref_256[8];
> +
> + for (i = 0; i < 32; i++)
> + {
> + int sign = i % 2 ? 1 : -1;
> + src1_256.a[i] = 10 + 3 * i + sign;
> + src2_256.a[i] = sign * 10 * i * i;
> + }
> +
> + for (i = 0; i < 8; i++)
> + res_256.a[i] = 0x7fffffff;
> +
> + CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
> + res_256.x = _mm256_dpbuuds_epi32 (res_256.x, src1_256.x, src2_256.x);
> + if (check_union256i_ud (res_256, res_ref_256))
> + abort ();
> +
> + union128i_ud res_128;
> + union128i_ub src2_128;
> + union128i_ub src1_128;
> + unsigned int res_ref_128[4];
> +
> + for (i = 0; i < 16; i++)
> + {
> + int sign = i % 2 ? 1 : -1;
> + src1_128.a[i] = 10 + 3 * i * i + sign;
> + src2_128.a[i] = sign * 10 * i * i;
> + }
> +
> + for (i = 0; i < 4; i++)
> + res_128.a[i] = 0x7fffffff;
> +
> + CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
> + res_128.x = _mm_dpbuuds_epi32 (res_128.x, src1_128.x, src2_128.x);
> + if (check_union128i_ud (res_128, res_ref_128))
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
> index 466555c0d06..a681bffe3e7 100644
> --- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc
> +++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
> @@ -81,6 +81,7 @@ extern void test_widekl (void) __attribute__((__target__("widekl")));
> extern void test_avxvnni (void) __attribute__((__target__("avxvnni")));
> extern void test_avx512fp16 (void) __attribute__((__target__("avx512fp16")));
> extern void test_avxifma (void) __attribute__((__target__("avxifma")));
> +extern void test_avxvnniint8 (void) __attribute__((__target__("avxvnniint8")));
>
> extern void test_no_sgx (void) __attribute__((__target__("no-sgx")));
> extern void test_no_avx5124fmaps(void) __attribute__((__target__("no-avx5124fmaps")));
> @@ -163,6 +164,7 @@ extern void test_no_widekl (void) __attribute__((__target__("no-widekl")));
> extern void test_no_avxvnni (void) __attribute__((__target__("no-avxvnni")));
> extern void test_no_avx512fp16 (void) __attribute__((__target__("no-avx512fp16")));
> extern void test_no_avxifma (void) __attribute__((__target__("no-avxifma")));
> +extern void test_no_avxvnniint8 (void) __attribute__((__target__("no-avxvnniint8")));
>
> extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona")));
> extern void test_arch_core2 (void) __attribute__((__target__("arch=core2")));
> diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c
> index fde56261d8f..ddde2df6657 100644
> --- a/gcc/testsuite/gcc.target/i386/sse-12.c
> +++ b/gcc/testsuite/gcc.target/i386/sse-12.c
> @@ -3,7 +3,7 @@
> popcntintrin.h gfniintrin.h and mm_malloc.h are usable
> with -O -std=c89 -pedantic-errors. */
> /* { dg-do compile } */
> -/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma" } */
> +/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8" } */
>
> #include <x86intrin.h>
>
> diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
> index bb29555babe..2b293216c6f 100644
> --- a/gcc/testsuite/gcc.target/i386/sse-13.c
> +++ b/gcc/testsuite/gcc.target/i386/sse-13.c
> @@ -1,5 +1,5 @@
> /* { dg-do compile } */
> -/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma" } */
> +/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8" } */
> /* { dg-add-options bind_pic_locally } */
>
> #include <mm_malloc.h>
> diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c
> index f2701ddaaf9..78b51048b90 100644
> --- a/gcc/testsuite/gcc.target/i386/sse-14.c
> +++ b/gcc/testsuite/gcc.target/i386/sse-14.c
> @@ -1,5 +1,5 @@
> /* { dg-do compile } */
> -/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma" } */
> +/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8" } */
> /* { dg-add-options bind_pic_locally } */
>
> #include <mm_malloc.h>
> diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c
> index 3d196975b1e..cc1c8cfa4be 100644
> --- a/gcc/testsuite/gcc.target/i386/sse-22.c
> +++ b/gcc/testsuite/gcc.target/i386/sse-22.c
> @@ -103,7 +103,7 @@
>
>
> #ifndef DIFFERENT_PRAGMAS
> -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma")
> +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8")
> #endif
>
> /* Following intrinsics require immediate arguments. They
> @@ -220,7 +220,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1)
>
> /* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */
> #ifdef DIFFERENT_PRAGMAS
> -#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma")
> +#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8")
> #endif
> #include <immintrin.h>
> test_1 (_cvtss_sh, unsigned short, float, 1)
> diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
> index d3a233f90fc..270f4483491 100644
> --- a/gcc/testsuite/gcc.target/i386/sse-23.c
> +++ b/gcc/testsuite/gcc.target/i386/sse-23.c
> @@ -843,6 +843,6 @@
> #define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1)
> #define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1)
>
> -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma")
> +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8")
>
> #include <x86intrin.h>
> diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
> index 69de3b96bfc..64ccfc746bd 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -9518,6 +9518,18 @@ proc check_effective_target_avxifma { } {
> } "-O0 -mavxifma" ]
> }
>
> +# Return 1 if avxvnniint8 instructions can be compiled.
> +proc check_effective_target_avxvnniint8 { } {
> + return [check_no_compiler_messages avxvnniint8 object {
> + typedef int __v8si __attribute__ ((__vector_size__ (32)));
> + __v8si
> + _mm256_dpbssd_epi32 (__v8si __A, __v8si __B, __v8si __C)
> + {
> + return __builtin_ia32_vpdpbssd256 (__A, __B, __C);
> + }
> + } "-O0 -mavxvnniint8" ]
> +}
> +
> # Return 1 if sse instructions can be compiled.
> proc check_effective_target_sse { } {
> return [check_no_compiler_messages sse object {
> --
> 2.18.1
>
> -----Original Message-----
> From: Hongtao Liu <crazylht@gmail.com>
> Sent: Monday, October 17, 2022 12:05 PM
> To: Jiang, Haochen <haochen.jiang@intel.com>
> Cc: gcc-patches@gcc.gnu.org; Liu, Hongtao <hongtao.liu@intel.com>
> Subject: Re: [PATCH 2/6] Support Intel AVX-VNNI-INT8
>
> On Fri, Oct 14, 2022 at 3:57 PM Haochen Jiang via Gcc-patches <gcc-
> patches@gcc.gnu.org> wrote:
> >
> > From: Kong Lingling <lingling.kong@intel.com>
> >
> > gcc/ChangeLog
> >
> > * common/config/i386/cpuinfo.h (get_available_features): Detect
> > avxvnniint8.
> > * common/config/i386/i386-common.cc
> > (OPTION_MASK_ISA2_AVXVNNIINT8_SET): New.
> > (OPTION_MASK_ISA2_AVXVNNIINT8_UNSET): Ditto.
> > (ix86_handle_option): Handle -mavxvnniint8.
> > * common/config/i386/i386-cpuinfo.h (enum processor_features):
> > Add FEATURE_AVXVNNIINT8.
> > * common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
> > avxvnniint8.
> > * config.gcc: Add avxvnniint8intrin.h.
> > * config/i386/avxvnniint8intrin.h: New file.
> > * config/i386/cpuid.h (bit_AVXVNNIINT8): New.
> > * config/i386/i386-builtin.def: Add new builtins.
> > * config/i386/i386-c.cc (ix86_target_macros_internal): Define
> > __AVXVNNIINT8__.
> > * config/i386/i386-options.cc (isa2_opts): Add -mavxvnniint8.
> > (ix86_valid_target_attribute_inner_p): Handle avxvnniint8.
> > * config/i386/i386-isa.def: Add DEF_PTA(AVXVNNIINT8) New..
> > * config/i386/i386.opt: Add option -mavxvnniint8.
> > * config/i386/immintrin.h: Include avxvnniint8intrin.h.
> > * config/i386/sse.md
> > (vpdp<vpdotprodtype>_<mode>): New define_insn.
> > * doc/extend.texi: Document avxvnniint8.
> > * doc/invoke.texi: Document -mavxvnniint8.
> > * doc/sourcebuild.texi: Document target avxvnniint8.
> >
> > gcc/testsuite/ChangeLog
> >
> > * g++.dg/other/i386-2.C: Add -mavxvnniint8.
> > * g++.dg/other/i386-3.C: Ditto.
> > * gcc.target/i386/avx-check.h: Add avxvnniint8 check.
> > * gcc.target/i386/sse-12.c: Add -mavxvnniint8.
> > * gcc.target/i386/sse-13.c: Ditto.
> > * gcc.target/i386/sse-14.c: Ditto.
> > * gcc.target/i386/sse-22.c: Ditto.
> > * gcc.target/i386/sse-23.c: Ditto.
> > * gcc.target/i386/funcspec-56.inc: Add new target attribute.
> > * lib/target-supports.exp
> > (check_effective_target_avxvnniint8): New.
> > * gcc.target/i386/avxvnniint8-1.c: Ditto.
> > * gcc.target/i386/avxvnniint8-vpdpbssd-2.c: Ditto.
> > * gcc.target/i386/avxvnniint8-vpdpbssds-2.c: Ditto.
> > * gcc.target/i386/avxvnniint8-vpdpbsud-2.c: Ditto.
> > * gcc.target/i386/avxvnniint8-vpdpbsuds-2.c: Ditto.
> > * gcc.target/i386/avxvnniint8-vpdpbuud-2.c: Ditto.
> > * gcc.target/i386/avxvnniint8-vpdpbuuds-2.c: Ditto.
> >
> > Co-authored-by: Hongyu Wang <hongyu.wang@intel.com>
> > Co-authored-by: Haochen Jiang <haochen.jiang@intel.com>
> > ---
> > gcc/common/config/i386/cpuinfo.h | 2 +
> > gcc/common/config/i386/i386-common.cc | 22 ++-
> > gcc/common/config/i386/i386-cpuinfo.h | 1 +
> > gcc/common/config/i386/i386-isas.h | 2 +
> > gcc/config.gcc | 2 +-
> > gcc/config/i386/avxvnniint8intrin.h | 138 ++++++++++++++++++
> > gcc/config/i386/cpuid.h | 1 +
> > gcc/config/i386/i386-builtin.def | 14 ++
> > gcc/config/i386/i386-c.cc | 2 +
> > gcc/config/i386/i386-isa.def | 1 +
> > gcc/config/i386/i386-options.cc | 4 +-
> > gcc/config/i386/i386.opt | 5 +
> > gcc/config/i386/immintrin.h | 2 +
> > gcc/config/i386/sse.md | 31 ++++
> > gcc/doc/extend.texi | 5 +
> > gcc/doc/invoke.texi | 9 +-
> > gcc/doc/sourcebuild.texi | 3 +
> > gcc/testsuite/g++.dg/other/i386-2.C | 2 +-
> > gcc/testsuite/g++.dg/other/i386-3.C | 2 +-
> > gcc/testsuite/gcc.target/i386/avx-check.h | 3 +
> > gcc/testsuite/gcc.target/i386/avxvnniint8-1.c | 43 ++++++
> > .../gcc.target/i386/avxvnniint8-vpdpbssd-2.c | 72 +++++++++
> > .../gcc.target/i386/avxvnniint8-vpdpbssds-2.c | 72 +++++++++
> > .../gcc.target/i386/avxvnniint8-vpdpbsud-2.c | 72 +++++++++
> > .../gcc.target/i386/avxvnniint8-vpdpbsuds-2.c | 72 +++++++++
> > .../gcc.target/i386/avxvnniint8-vpdpbuud-2.c | 72 +++++++++
> > .../gcc.target/i386/avxvnniint8-vpdpbuuds-2.c | 72 +++++++++
> > gcc/testsuite/gcc.target/i386/funcspec-56.inc | 2 +
> > gcc/testsuite/gcc.target/i386/sse-12.c | 2 +-
> > gcc/testsuite/gcc.target/i386/sse-13.c | 2 +-
> > gcc/testsuite/gcc.target/i386/sse-14.c | 2 +-
> > gcc/testsuite/gcc.target/i386/sse-22.c | 4 +-
> > gcc/testsuite/gcc.target/i386/sse-23.c | 2 +-
> > gcc/testsuite/lib/target-supports.exp | 12 ++
> > 34 files changed, 738 insertions(+), 14 deletions(-) create mode
> > 100644 gcc/config/i386/avxvnniint8intrin.h
> > create mode 100644 gcc/testsuite/gcc.target/i386/avxvnniint8-1.c
> > create mode 100644
> > gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbssd-2.c
> > create mode 100644
> > gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbssds-2.c
> > create mode 100644
> > gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbsud-2.c
> > create mode 100644
> > gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbsuds-2.c
> > create mode 100644
> > gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbuud-2.c
> > create mode 100644
> > gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbuuds-2.c
> >
> > diff --git a/gcc/common/config/i386/cpuinfo.h
> > b/gcc/common/config/i386/cpuinfo.h
> > index 9bb21c6cacc..bed88003f8e 100644
> > --- a/gcc/common/config/i386/cpuinfo.h
> > +++ b/gcc/common/config/i386/cpuinfo.h
> > @@ -795,6 +795,8 @@ get_available_features (struct __processor_model
> *cpu_model,
> > set_feature (FEATURE_AVXVNNI);
> > if (eax & bit_AVXIFMA)
> > set_feature (FEATURE_AVXIFMA);
> > + if (edx & bit_AVXVNNIINT8)
> > + set_feature (FEATURE_AVXVNNIINT8);
> > }
> > if (avx512_usable)
> > {
> > diff --git a/gcc/common/config/i386/i386-common.cc
> > b/gcc/common/config/i386/i386-common.cc
> > index 4de7906b247..6a2a7e3d25a 100644
> > --- a/gcc/common/config/i386/i386-common.cc
> > +++ b/gcc/common/config/i386/i386-common.cc
> > @@ -108,6 +108,7 @@ along with GCC; see the file COPYING3. If not see
> > #define OPTION_MASK_ISA2_AMX_TILE_SET
> OPTION_MASK_ISA2_AMX_TILE
> > #define OPTION_MASK_ISA2_AMX_INT8_SET
> OPTION_MASK_ISA2_AMX_INT8
> > #define OPTION_MASK_ISA2_AMX_BF16_SET
> OPTION_MASK_ISA2_AMX_BF16
> > +#define OPTION_MASK_ISA2_AVXVNNIINT8_SET
> OPTION_MASK_ISA2_AVXVNNIINT8
> >
> > /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
> > as -msse4.2. */
> > @@ -214,7 +215,7 @@ along with GCC; see the file COPYING3. If not see
> > (OPTION_MASK_ISA_AVX2 | OPTION_MASK_ISA_AVX512F_UNSET)
> #define
> > OPTION_MASK_ISA2_AVX2_UNSET \
> > (OPTION_MASK_ISA2_AVXIFMA_UNSET |
> OPTION_MASK_ISA2_AVXVNNI_UNSET \
> > - | OPTION_MASK_ISA2_AVX512F_UNSET)
> > + | OPTION_MASK_ISA2_AVXVNNIINT8_UNSET |
> > + OPTION_MASK_ISA2_AVX512F_UNSET)
> > #define OPTION_MASK_ISA_AVX512F_UNSET \
> > (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD_UNSET
> \
> > | OPTION_MASK_ISA_AVX512PF_UNSET |
> OPTION_MASK_ISA_AVX512ER_UNSET
> > \ @@ -278,6 +279,7 @@ along with GCC; see the file COPYING3. If not
> > see #define OPTION_MASK_ISA2_KL_UNSET \
> > (OPTION_MASK_ISA2_KL | OPTION_MASK_ISA2_WIDEKL_UNSET)
> #define
> > OPTION_MASK_ISA2_WIDEKL_UNSET OPTION_MASK_ISA2_WIDEKL
> > +#define OPTION_MASK_ISA2_AVXVNNIINT8_UNSET
> > +OPTION_MASK_ISA2_AVXVNNIINT8
> >
> > /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
> > as -mno-sse4.1. */
> > @@ -1142,6 +1144,24 @@ ix86_handle_option (struct gcc_options *opts,
> > }
> > return true;
> >
> > + case OPT_mavxvnniint8:
> > + if (value)
> > + {
> > + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVXVNNIINT8_SET;
> > + opts->x_ix86_isa_flags2_explicit |=
> > + OPTION_MASK_ISA2_AVXVNNIINT8_SET;
> > + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2_SET;
> > + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX2_SET;
> > + }
> > + else
> > + {
> > + opts->x_ix86_isa_flags2 &=
> > + ~OPTION_MASK_ISA2_AVXVNNIINT8_UNSET;
> > + opts->x_ix86_isa_flags2_explicit |=
> > + OPTION_MASK_ISA2_AVXVNNIINT8_UNSET;
> > + }
> > + return true;
> > +
> > case OPT_mfma:
> > if (value)
> > {
> > diff --git a/gcc/common/config/i386/i386-cpuinfo.h
> > b/gcc/common/config/i386/i386-cpuinfo.h
> > index 968f9a56a6c..9a6b92fab79 100644
> > --- a/gcc/common/config/i386/i386-cpuinfo.h
> > +++ b/gcc/common/config/i386/i386-cpuinfo.h
> > @@ -241,6 +241,7 @@ enum processor_features
> > FEATURE_X86_64_V3,
> > FEATURE_X86_64_V4,
> > FEATURE_AVXIFMA,
> > + FEATURE_AVXVNNIINT8,
> > CPU_FEATURE_MAX
> > };
> >
> > diff --git a/gcc/common/config/i386/i386-isas.h
> > b/gcc/common/config/i386/i386-isas.h
> > index b05b4bb8f0d..8c1f351056c 100644
> > --- a/gcc/common/config/i386/i386-isas.h
> > +++ b/gcc/common/config/i386/i386-isas.h
> > @@ -176,4 +176,6 @@ ISA_NAMES_TABLE_START
> > ISA_NAMES_TABLE_ENTRY("x86-64-v3", FEATURE_X86_64_V3,
> P_X86_64_V3, NULL)
> > ISA_NAMES_TABLE_ENTRY("x86-64-v4", FEATURE_X86_64_V4,
> P_X86_64_V4, NULL)
> > ISA_NAMES_TABLE_ENTRY("avxifma", FEATURE_AVXIFMA, P_NONE,
> > "-mavxifma")
> > + ISA_NAMES_TABLE_ENTRY("avxvnniint8", FEATURE_AVXVNNIINT8,
> > + P_NONE, "-mavxvnniint8")
> > ISA_NAMES_TABLE_END
> > diff --git a/gcc/config.gcc b/gcc/config.gcc index
> > 12365abbf86..4df78238910 100644
> > --- a/gcc/config.gcc
> > +++ b/gcc/config.gcc
> > @@ -422,7 +422,7 @@ i[34567]86-*-* | x86_64-*-*)
> > amxbf16intrin.h x86gprintrin.h uintrintrin.h
> > hresetintrin.h keylockerintrin.h avxvnniintrin.h
> > mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h
> > - avxifmaintrin.h"
> > + avxifmaintrin.h avxvnniint8intrin.h"
> > ;;
> > ia64-*-*)
> > extra_headers=ia64intrin.h
> > diff --git a/gcc/config/i386/avxvnniint8intrin.h
> > b/gcc/config/i386/avxvnniint8intrin.h
> > new file mode 100644
> > index 00000000000..362e6f65c2a
> > --- /dev/null
> > +++ b/gcc/config/i386/avxvnniint8intrin.h
> > @@ -0,0 +1,138 @@
> > +/* Copyright (C) 2020 Free Software Foundation, Inc.
> > +
> > + This file is part of GCC.
> > +
> > + GCC is free software; you can redistribute it and/or modify
> > + it under the terms of the GNU General Public License as published by
> > + the Free Software Foundation; either version 3, or (at your option)
> > + any later version.
> > +
> > + GCC is distributed in the hope that it will be useful,
> > + but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> > + GNU General Public License for more details.
> > +
> > + Under Section 7 of GPL version 3, you are granted additional
> > + permissions described in the GCC Runtime Library Exception, version
> > + 3.1, as published by the Free Software Foundation.
> > +
> > + You should have received a copy of the GNU General Public License and
> > + a copy of the GCC Runtime Library Exception along with this program;
> > + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
> > + <http://www.gnu.org/licenses/>. */
> > +
> > +#if !defined _IMMINTRIN_H_INCLUDED
> > +#error "Never use <avxvnniint8vlintrin.h> directly; include <immintrin.h>
> instead."
> > +#endif
> > +
> > +#ifndef _AVXVNNIINT8INTRIN_H_INCLUDED #define
> > +_AVXVNNIINT8INTRIN_H_INCLUDED
> > +
> > +#if !defined(__AVXVNNIINT8__)
> > +#pragma GCC push_options
> > +#pragma GCC target("avxvnniint8")
> > +#define __DISABLE_AVXVNNIINT8__
> > +#endif /* __AVXVNNIINT8__ */
> > +
> > +extern __inline __m128i
> > +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > +_mm_dpbssd_epi32 (__m128i __W, __m128i __A, __m128i __B) {
> > + return (__m128i)
> > + __builtin_ia32_vpdpbssd128 ((__v4si) __W, (__v4si) __A, (__v4si)
> > +__B); }
> > +
> > +extern __inline __m128i
> > +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > +_mm_dpbssds_epi32 (__m128i __W, __m128i __A, __m128i __B) {
> > + return (__m128i)
> > + __builtin_ia32_vpdpbssds128 ((__v4si) __W, (__v4si) __A, (__v4si)
> > +__B); }
> > +
> > +extern __inline __m128i
> > +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > +_mm_dpbsud_epi32 (__m128i __W, __m128i __A, __m128i __B) {
> > + return (__m128i)
> > + __builtin_ia32_vpdpbsud128 ((__v4si) __W, (__v4si) __A, (__v4si)
> > +__B); }
> > +
> > +extern __inline __m128i
> > +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > +_mm_dpbsuds_epi32 (__m128i __W, __m128i __A, __m128i __B) {
> > + return (__m128i)
> > + __builtin_ia32_vpdpbsuds128 ((__v4si) __W, (__v4si) __A, (__v4si)
> > +__B); }
> > +
> > +extern __inline __m128i
> > +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > +_mm_dpbuud_epi32 (__m128i __W, __m128i __A, __m128i __B) {
> > + return (__m128i)
> > + __builtin_ia32_vpdpbuud128 ((__v4si) __W, (__v4si) __A, (__v4si)
> > +__B); }
> > +
> > +extern __inline __m128i
> > +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > +_mm_dpbuuds_epi32 (__m128i __W, __m128i __A, __m128i __B) {
> > + return (__m128i)
> > + __builtin_ia32_vpdpbuuds128 ((__v4si) __W, (__v4si) __A, (__v4si)
> > +__B); }
> > +
> > +extern __inline __m256i
> > +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > +_mm256_dpbssd_epi32 (__m256i __W, __m256i __A, __m256i __B) {
> > + return (__m256i)
> > + __builtin_ia32_vpdpbssd256 ((__v8si) __W, (__v8si) __A, (__v8si)
> > +__B); }
> > +
> > +extern __inline __m256i
> > +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > +_mm256_dpbssds_epi32 (__m256i __W, __m256i __A, __m256i __B) {
> > + return (__m256i)
> > + __builtin_ia32_vpdpbssds256 ((__v8si) __W, (__v8si) __A, (__v8si)
> > +__B); }
> > +
> > +extern __inline __m256i
> > +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > +_mm256_dpbsud_epi32 (__m256i __W, __m256i __A, __m256i __B) {
> > + return (__m256i)
> > + __builtin_ia32_vpdpbsud256 ((__v8si) __W, (__v8si) __A, (__v8si)
> > +__B); }
> > +
> > +extern __inline __m256i
> > +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > +_mm256_dpbsuds_epi32 (__m256i __W, __m256i __A, __m256i __B) {
> > + return (__m256i)
> > + __builtin_ia32_vpdpbsuds256 ((__v8si) __W, (__v8si) __A, (__v8si)
> > +__B); }
> > +
> > +extern __inline __m256i
> > +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > +_mm256_dpbuud_epi32 (__m256i __W, __m256i __A, __m256i __B) {
> > + return (__m256i)
> > + __builtin_ia32_vpdpbuud256 ((__v8si) __W, (__v8si) __A, (__v8si)
> > +__B); }
> > +
> > +extern __inline __m256i
> > +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > +_mm256_dpbuuds_epi32 (__m256i __W, __m256i __A, __m256i __B) {
> > + return (__m256i)
> > + __builtin_ia32_vpdpbuuds256 ((__v8si) __W, (__v8si) __A, (__v8si)
> > +__B); }
> > +
> > +#ifdef __DISABLE_AVXVNNIINT8__
> > +#undef __DISABLE_AVXVNNIINT8__
> > +#pragma GCC pop_options
> > +#endif /* __DISABLE_AVXVNNIINT8__ */
> > +
> > +#endif /* __AVXVNNIINT8INTRIN_H_INCLUDED */
> > diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h index
> > 9885699efd5..f5fad22149a 100644
> > --- a/gcc/config/i386/cpuid.h
> > +++ b/gcc/config/i386/cpuid.h
> > @@ -49,6 +49,7 @@
> > #define bit_RDRND (1 << 30)
> >
> > /* %edx */
> > +#define bit_AVXVNNIINT8 (1 << 4)
> > #define bit_CMPXCHG8B (1 << 8)
> > #define bit_CMOV (1 << 15)
> > #define bit_MMX (1 << 23)
> > diff --git a/gcc/config/i386/i386-builtin.def
> > b/gcc/config/i386/i386-builtin.def
> > index 4a89099a00f..e6edae5728b 100644
> > --- a/gcc/config/i386/i386-builtin.def
> > +++ b/gcc/config/i386/i386-builtin.def
> > @@ -2696,6 +2696,20 @@ BDESC (OPTION_MASK_ISA_AVX512VNNI |
> > OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_A BDESC
> > (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0,
> > CODE_FOR_vpdpwssds_v4si_mask,
> "__builtin_ia32_vpdpwssds_v4si_mask",
> > IX86_BUILTIN_VPDPWSSDSV4SI_MASK, UNKNOWN, (int)
> > V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) BDESC
> (OPTION_MASK_ISA_AVX512VNNI |
> > OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_maskz,
> > "__builtin_ia32_vpdpwssds_v4si_maskz",
> > IX86_BUILTIN_VPDPWSSDSV4SI_MASKZ, UNKNOWN, (int)
> > V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
> >
> > +/* AVXVNNIINT8 */
> > +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8,
> CODE_FOR_vpdpbssd_v8si,
> > +"__builtin_ia32_vpdpbssd256", IX86_BUILTIN_VPDPBSSDV8SI,
> UNKNOWN,
> > +(int) V8SI_FTYPE_V8SI_V8SI_V8SI) BDESC (0,
> > +OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbssds_v8si,
> > +"__builtin_ia32_vpdpbssds256", IX86_BUILTIN_VPDPBSSDSV8SI,
> UNKNOWN,
> > +(int) V8SI_FTYPE_V8SI_V8SI_V8SI) BDESC (0,
> > +OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbsud_v8si,
> > +"__builtin_ia32_vpdpbsud256", IX86_BUILTIN_VPDPBSUDV8SI,
> UNKNOWN,
> > +(int) V8SI_FTYPE_V8SI_V8SI_V8SI) BDESC (0,
> > +OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbsuds_v8si,
> > +"__builtin_ia32_vpdpbsuds256", IX86_BUILTIN_VPDPBSUDSV8SI,
> UNKNOWN,
> > +(int) V8SI_FTYPE_V8SI_V8SI_V8SI) BDESC (0,
> > +OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbuud_v8si,
> > +"__builtin_ia32_vpdpbuud256", IX86_BUILTIN_VPDPBUUDV8SI,
> UNKNOWN,
> > +(int) V8SI_FTYPE_V8SI_V8SI_V8SI) BDESC (0,
> > +OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbuuds_v8si,
> > +"__builtin_ia32_vpdpbuuds256", IX86_BUILTIN_VPDPBUUDSV8SI,
> UNKNOWN,
> > +(int) V8SI_FTYPE_V8SI_V8SI_V8SI) BDESC (0,
> > +OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbssd_v4si,
> > +"__builtin_ia32_vpdpbssd128", IX86_BUILTIN_VPDPBSSDV4SI,
> UNKNOWN,
> > +(int) V4SI_FTYPE_V4SI_V4SI_V4SI) BDESC (0,
> > +OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbssds_v4si,
> > +"__builtin_ia32_vpdpbssds128", IX86_BUILTIN_VPDPBSSDSV4SI,
> UNKNOWN,
> > +(int) V4SI_FTYPE_V4SI_V4SI_V4SI) BDESC (0,
> > +OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbsud_v4si,
> > +"__builtin_ia32_vpdpbsud128", IX86_BUILTIN_VPDPBSUDV4SI,
> UNKNOWN,
> > +(int) V4SI_FTYPE_V4SI_V4SI_V4SI) BDESC (0,
> > +OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbsuds_v4si,
> > +"__builtin_ia32_vpdpbsuds128", IX86_BUILTIN_VPDPBSUDSV4SI,
> UNKNOWN,
> > +(int) V4SI_FTYPE_V4SI_V4SI_V4SI) BDESC (0,
> > +OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbuud_v4si,
> > +"__builtin_ia32_vpdpbuud128", IX86_BUILTIN_VPDPBUUDV4SI,
> UNKNOWN,
> > +(int) V4SI_FTYPE_V4SI_V4SI_V4SI) BDESC (0,
> > +OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbuuds_v4si,
> > +"__builtin_ia32_vpdpbuuds128", IX86_BUILTIN_VPDPBUUDSV4SI,
> UNKNOWN,
> > +(int) V4SI_FTYPE_V4SI_V4SI_V4SI)
> > +
> > /* VPCLMULQDQ */
> > BDESC (OPTION_MASK_ISA_VPCLMULQDQ |
> OPTION_MASK_ISA_AVX512VL, 0,
> > CODE_FOR_vpclmulqdq_v2di, "__builtin_ia32_vpclmulqdq_v2di",
> > IX86_BUILTIN_VPCLMULQDQ2, UNKNOWN, (int)
> V2DI_FTYPE_V2DI_V2DI_INT)
> > BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX, 0,
> > CODE_FOR_vpclmulqdq_v4di, "__builtin_ia32_vpclmulqdq_v4di",
> > IX86_BUILTIN_VPCLMULQDQ4, UNKNOWN, (int)
> V4DI_FTYPE_V4DI_V4DI_INT)
> > diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
> > index 3494ec035d5..a9a35c0a18a 100644
> > --- a/gcc/config/i386/i386-c.cc
> > +++ b/gcc/config/i386/i386-c.cc
> > @@ -635,6 +635,8 @@ ix86_target_macros_internal (HOST_WIDE_INT
> isa_flag,
> > def_or_undef (parse_in, "__AVXVNNI__");
> > if (isa_flag2 & OPTION_MASK_ISA2_AVXIFMA)
> > def_or_undef (parse_in, "__AVXIFMA__");
> > + if (isa_flag2 & OPTION_MASK_ISA2_AVXVNNIINT8)
> > + def_or_undef (parse_in, "__AVXVNNIINT8__");
> > if (TARGET_IAMCU)
> > {
> > def_or_undef (parse_in, "__iamcu"); diff --git
> > a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def index
> > 6e0254ce418..c95b917c6ce 100644
> > --- a/gcc/config/i386/i386-isa.def
> > +++ b/gcc/config/i386/i386-isa.def
> > @@ -110,3 +110,4 @@ DEF_PTA(WIDEKL)
> > DEF_PTA(AVXVNNI)
> > DEF_PTA(AVX512FP16)
> > DEF_PTA(AVXIFMA)
> > +DEF_PTA(AVXVNNIINT8)
> > diff --git a/gcc/config/i386/i386-options.cc
> > b/gcc/config/i386/i386-options.cc index 5facb64c2a8..3e6d04433a6
> > 100644
> > --- a/gcc/config/i386/i386-options.cc
> > +++ b/gcc/config/i386/i386-options.cc
> > @@ -227,7 +227,8 @@ static struct ix86_target_opts isa2_opts[] =
> > { "-mwidekl", OPTION_MASK_ISA2_WIDEKL },
> > { "-mavxvnni", OPTION_MASK_ISA2_AVXVNNI },
> > { "-mavx512fp16", OPTION_MASK_ISA2_AVX512FP16 },
> > - { "-mavxifma", OPTION_MASK_ISA2_AVXIFMA }
> > + { "-mavxifma", OPTION_MASK_ISA2_AVXIFMA },
> > + { "-mavxvnniint8", OPTION_MASK_ISA2_AVXVNNIINT8 }
> > };
> > static struct ix86_target_opts isa_opts[] = { @@ -1074,6 +1075,7 @@
> > ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char
> *p_strings[],
> > IX86_ATTR_ISA ("avxvnni", OPT_mavxvnni),
> > IX86_ATTR_ISA ("avx512fp16", OPT_mavx512fp16),
> > IX86_ATTR_ISA ("avxifma", OPT_mavxifma),
> > + IX86_ATTR_ISA ("avxvnniint8", OPT_mavxvnniint8),
> >
> > /* enum options */
> > IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), diff --git
> > a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index
> > 36e28b7063d..53d534f6392 100644
> > --- a/gcc/config/i386/i386.opt
> > +++ b/gcc/config/i386/i386.opt
> > @@ -1219,3 +1219,8 @@ mavxifma
> > Target Mask(ISA2_AVXIFMA) Var(ix86_isa_flags2) Save Support MMX,
> > SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, and AVXIFMA
> > built-in functions and code generation.
> > +
> > +mavxvnniint8
> > +Target Mask(ISA2_AVXVNNIINT8) Var(ix86_isa_flags2) Save Support
> MMX,
> > +SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and
> > +AVXVNNIINT8 built-in functions and code generation.
> > diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
> > index e9d4e975243..ddea249d09b 100644
> > --- a/gcc/config/i386/immintrin.h
> > +++ b/gcc/config/i386/immintrin.h
> > @@ -46,6 +46,8 @@
> >
> > #include <avxifmaintrin.h>
> >
> > +#include <avxvnniint8intrin.h>
> > +
> > #include <avx2intrin.h>
> >
> > #include <avx512fintrin.h>
> > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index
> > 331347569ea..49490a213ea 100644
> > --- a/gcc/config/i386/sse.md
> > +++ b/gcc/config/i386/sse.md
> > @@ -200,6 +200,13 @@
> > UNSPEC_COMPLEX_FCMUL
> > UNSPEC_COMPLEX_MASK
> >
> > + ;; For AVX-VNNI-INT8 support
> > + UNSPEC_VPDPBSSD
> > + UNSPEC_VPDPBSSDS
> > + UNSPEC_VPDPBSUD
> > + UNSPEC_VPDPBSUDS
> > + UNSPEC_VPDPBUUD
> > + UNSPEC_VPDPBUUDS
> > ])
> Can we have unified naming conversion for the UNSPECs and those in
> AVX512(AVX)_VNNI, since they have similar semantics.
> Maybe change the old UNSPEC_VPMADDUBSWACCD to the "new-style"
> UNSPEC_VPDPBUSD? Similar for other UNSPEC_VPMADD***.
I suppose we can do that in this patch to avoid confusion.
> >
> > (define_c_enum "unspecv" [
> > @@ -29241,3 +29248,27 @@
> > gcc_unreachable ();
> > DONE;
> > })
> > +
> > +(define_int_iterator VPDOTPROD
> > + [UNSPEC_VPDPBSSD
> > + UNSPEC_VPDPBSSDS
> > + UNSPEC_VPDPBSUD
> > + UNSPEC_VPDPBSUDS
> > + UNSPEC_VPDPBUUD
> > + UNSPEC_VPDPBUUDS])
> > +
> > +(define_int_attr vpdotprodtype
> > + [(UNSPEC_VPDPBSSD "bssd") (UNSPEC_VPDPBSSDS "bssds")
> > + (UNSPEC_VPDPBSUD "bsud") (UNSPEC_VPDPBSUDS "bsuds")
> > + (UNSPEC_VPDPBUUD "buud") (UNSPEC_VPDPBUUDS "buuds")])
> > +
> > +(define_insn "vpdp<vpdotprodtype>_<mode>"
> > + [(set (match_operand:VI4_AVX 0 "register_operand" "=x")
> > + (unspec:VI4_AVX
> > + [(match_operand:VI4_AVX 1 "register_operand" "0")
> > + (match_operand:VI4_AVX 2 "register_operand" "x")
> > + (match_operand:VI4_AVX 3 "nonimmediate_operand" "xm")]
> > + VPDOTPROD))]
> > + "TARGET_AVXVNNIINT8"
> > + "vpdp<vpdotprodtype>\t{%3, %2, %0|%0, %2, %3}"
> > + [(set_attr "prefix" "vex")])
> > diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index
> > edecf5c0070..9a8de9fc226 100644
> > --- a/gcc/doc/extend.texi
> > +++ b/gcc/doc/extend.texi
> > @@ -7065,6 +7065,11 @@ Enable/disable the generation of the AVXVNNI
> instructions.
> > @cindex @code{target("avxifma")} function attribute, x86
> > Enable/disable the generation of the AVXIFMA instructions.
> >
> > +@item avxvnniint8
> > +@itemx no-avxvnniint8
> > +@cindex @code{target("avxvnniint8")} function attribute, x86
> > +Enable/disable the generation of the AVXVNNIINT8 instructions.
> > +
> > @item cld
> > @itemx no-cld
> > @cindex @code{target("cld")} function attribute, x86 diff --git
> > a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index
> > 886fc1d0164..d4ff7549bf3 100644
> > --- a/gcc/doc/invoke.texi
> > +++ b/gcc/doc/invoke.texi
> > @@ -1436,7 +1436,7 @@ See RS/6000 and PowerPC Options.
> > -mavx5124fmaps -mavx512vnni -mavx5124vnniw -mprfchw -mrdpid
> @gol
> > -mrdseed -msgx -mavx512vp2intersect -mserialize -mtsxldtrk@gol
> > -mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset -mavxvnni@gol
> > --mavx512fp16 -mavxifma @gol
> > +-mavx512fp16 -mavxifma -mavxvnniint8 @gol
> > -mcldemote -mms-bitfields -mno-align-stringops
> > -minline-all-stringops @gol -minline-stringops-dynamically
> > -mstringop-strategy=@var{alg} @gol -mkl -mwidekl @gol @@ -32896,6
> > +32896,9 @@ preferred alignment to @option{-mpreferred-stack-
> boundary=2}.
> > @need 200
> > @itemx -mavxifma
> > @opindex mavxifma
> > +@need 200
> > +@itemx -mavxvnniint8
> > +@opindex mavxvnniint8
> > These switches enable the use of instructions in the MMX, SSE, SSE2,
> > SSE3, SSSE3, SSE4, SSE4A, SSE4.1, SSE4.2, AVX, AVX2, AVX512F,
> > AVX512PF, AVX512ER, AVX512CD, AVX512VL, AVX512BW, AVX512DQ,
> > AVX512IFMA, AVX512VBMI, SHA, @@ -32906,8 +32909,8 @@ XSAVEOPT,
> XSAVEC,
> > XSAVES, RTM, HLE, TBM, MWAITX, CLZERO, PKU, AVX512VBMI2, GFNI,
> VAES,
> > WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B,
> AVX512BF16,
> > ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI,
> AVX5124VNNIW,
> > SERIALIZE, UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL,
> > AVXVNNI, AVX512FP16, -AVXIFMA or CLDEMOTE extended instruction sets.
> Each has a corresponding -@option{-mno-} option to disable use of these
> instructions.
> > +AVXIFMA, AVXVNNIINT8 or CLDEMOTE extended instruction sets. Each
> has
> > +a corresponding @option{-mno-} option to disable use of these
> instructions.
> >
> > These extensions are also available as built-in functions: see
> > @ref{x86 Built-in Functions}, for details of the functions enabled
> > and diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
> > index 0173acf4a65..e21a1d381e0 100644
> > --- a/gcc/doc/sourcebuild.texi
> > +++ b/gcc/doc/sourcebuild.texi
> > @@ -2493,6 +2493,9 @@ Target supports the execution of
> @code{avx512vp2intersect} instructions.
> > @item avxifma
> > Target supports the execution of @code{avxifma} instructions.
> >
> > +@item avxvnniint8
> > +Target supports the execution of @code{avxvnniint8} instructions.
> > +
> > @item amx_tile
> > Target supports the execution of @code{amx-tile} instructions.
> >
> > diff --git a/gcc/testsuite/g++.dg/other/i386-2.C
> > b/gcc/testsuite/g++.dg/other/i386-2.C
> > index 5388606779b..ebd01fe47bc 100644
> > --- a/gcc/testsuite/g++.dg/other/i386-2.C
> > +++ b/gcc/testsuite/g++.dg/other/i386-2.C
> > @@ -1,5 +1,5 @@
> > /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
> > -/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx
> > -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -
> mbmi2
> > -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -
> mprfchw
> > -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf
> > -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq
> > -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2
> > -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx
> > -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig
> > -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize
> > -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni
> > -mavx512fp16 -mavxifma" } */
> > +/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx
> > +-mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi
> > +-mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -
> mrdseed
> > +-mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd
> > +-mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt
> > +-mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi
> > +-mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -
> mclwb
> > +-mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -
> mpconfig
> > +-mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize
> > +-mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni
> > +-mavx512fp16 -mavxifma -mavxvnniint8" } */
> >
> > /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
> > xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
> > diff --git a/gcc/testsuite/g++.dg/other/i386-3.C
> > b/gcc/testsuite/g++.dg/other/i386-3.C
> > index 86cedd3d32f..b66498f1d4c 100644
> > --- a/gcc/testsuite/g++.dg/other/i386-3.C
> > +++ b/gcc/testsuite/g++.dg/other/i386-3.C
> > @@ -1,5 +1,5 @@
> > /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
> > -/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow
> > -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -
> mbmi
> > -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed
> > -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd
> > -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt
> > -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi
> > -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -
> mclwb
> > -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig
> > -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize
> > -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni
> > -mavx512fp16 -mavxifma" } */
> > +/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow
> > +-mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -
> mbmi
> > +-mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -
> mrdseed
> > +-mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd
> > +-mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt
> > +-mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi
> > +-mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -
> mclwb
> > +-mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -
> mpconfig
> > +-mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize
> > +-mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni
> > +-mavx512fp16 -mavxifma -mavxvnniint8" } */
> >
> > /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
> > xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
> > diff --git a/gcc/testsuite/gcc.target/i386/avx-check.h
> > b/gcc/testsuite/gcc.target/i386/avx-check.h
> > index 24ee6ab4efd..77507ca2edc 100644
> > --- a/gcc/testsuite/gcc.target/i386/avx-check.h
> > +++ b/gcc/testsuite/gcc.target/i386/avx-check.h
> > @@ -25,6 +25,9 @@ main ()
> > && avx_os_support ()
> > #ifdef AVXIFMA
> > && __builtin_cpu_supports ("avxifma")
> > +#endif
> > +#ifdef AVXVNNIINT8
> > + && __builtin_cpu_supports ("avxvnniint8")
> > #endif
> > )
> > {
> > diff --git a/gcc/testsuite/gcc.target/i386/avxvnniint8-1.c
> > b/gcc/testsuite/gcc.target/i386/avxvnniint8-1.c
> > new file mode 100644
> > index 00000000000..d6942f34d6e
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/avxvnniint8-1.c
> > @@ -0,0 +1,43 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-mavxvnniint8 -O2" } */
> > +/* { dg-final { scan-assembler-times "vpdpbssd\[
> > +\\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-
> 9\]+\[^\n\r]*%ymm\[0-9\]
> > ++(?:\n|\[ \\t\]+#)" 1 } } */
> > +/* { dg-final { scan-assembler-times "vpdpbssd\[
> > +\\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-
> 9\]+\[^\n\r]*%xmm\[0-9\]
> > ++(?:\n|\[ \\t\]+#)" 1 } } */
> > +/* { dg-final { scan-assembler-times "vpdpbssds\[
> > +\\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-
> 9\]+\[^\n\r]*%ymm\[0-9\]
> > ++(?:\n|\[ \\t\]+#)" 1 } } */
> > +/* { dg-final { scan-assembler-times "vpdpbssds\[
> > +\\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-
> 9\]+\[^\n\r]*%xmm\[0-9\]
> > ++(?:\n|\[ \\t\]+#)" 1 } } */
> > +/* { dg-final { scan-assembler-times "vpdpbsud\[
> > +\\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-
> 9\]+\[^\n\r]*%ymm\[0-9\]
> > ++(?:\n|\[ \\t\]+#)" 1 } } */
> > +/* { dg-final { scan-assembler-times "vpdpbsud\[
> > +\\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-
> 9\]+\[^\n\r]*%xmm\[0-9\]
> > ++(?:\n|\[ \\t\]+#)" 1 } } */
> > +/* { dg-final { scan-assembler-times "vpdpbsuds\[
> > +\\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-
> 9\]+\[^\n\r]*%ymm\[0-9\]
> > ++(?:\n|\[ \\t\]+#)" 1 } } */
> > +/* { dg-final { scan-assembler-times "vpdpbsuds\[
> > +\\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-
> 9\]+\[^\n\r]*%xmm\[0-9\]
> > ++(?:\n|\[ \\t\]+#)" 1 } } */
> > +/* { dg-final { scan-assembler-times "vpdpbuud\[
> > +\\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-
> 9\]+\[^\n\r]*%ymm\[0-9\]
> > ++(?:\n|\[ \\t\]+#)" 1 } } */
> > +/* { dg-final { scan-assembler-times "vpdpbuud\[
> > +\\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-
> 9\]+\[^\n\r]*%xmm\[0-9\]
> > ++(?:\n|\[ \\t\]+#)" 1 } } */
> > +/* { dg-final { scan-assembler-times "vpdpbuuds\[
> > +\\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-
> 9\]+\[^\n\r]*%ymm\[0-9\]
> > ++(?:\n|\[ \\t\]+#)" 1 } } */
> > +/* { dg-final { scan-assembler-times "vpdpbuuds\[
> > +\\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-
> 9\]+\[^\n\r]*%xmm\[0-9\]
> > ++(?:\n|\[ \\t\]+#)" 1 } } */
> > +
> > +
> > +#include <immintrin.h>
> > +
> > +volatile __m256i x,y,z;
> > +volatile __m128i x_,y_,z_;
> > +volatile __mmask8 m;
> > +
> > +void extern
> > +avxvnniint8_test (void)
> > +{
> > + x = _mm256_dpbssd_epi32 (x, y, z);
> > + x_ = _mm_dpbssd_epi32 (x_, y_, z_);
> > +
> > + x = _mm256_dpbssds_epi32 (x, y, z); x_ = _mm_dpbssds_epi32 (x_,
> > + y_, z_);
> > +
> > + x = _mm256_dpbsud_epi32 (x, y, z);
> > + x_ = _mm_dpbsud_epi32 (x_, y_, z_);
> > +
> > + x = _mm256_dpbsuds_epi32 (x, y, z); x_ = _mm_dpbsuds_epi32 (x_,
> > + y_, z_);
> > +
> > + x = _mm256_dpbuud_epi32 (x, y, z);
> > + x_ = _mm_dpbuud_epi32 (x_, y_, z_);
> > +
> > + x = _mm256_dpbuuds_epi32 (x, y, z);
> > + x_ = _mm_dpbuuds_epi32 (x_, y_, z_); }
> > diff --git a/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbssd-2.c
> > b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbssd-2.c
> > new file mode 100644
> > index 00000000000..5016de39621
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbssd-2.c
> > @@ -0,0 +1,72 @@
> > +/* { dg-do run } */
> > +/* { dg-options "-O2 -mavxvnniint8" } */
> > +/* { dg-require-effective-target avxvnniint8 } */ #define AVXVNNIINT8
> > +#ifndef CHECK #define CHECK "avx-check.h"
> > +#endif
> > +
> > +#ifndef TEST
> > +#define TEST avx_test
> > +#endif
> > +
> > +#include CHECK
> > +
> > +static void
> > +CALC (int *r, int *dst, char *s1, char *s2, int size) {
> > + short tempres[32];
> > + for (int i = 0; i < size; i++) {
> > + tempres[i] = (short) s1[i] * (short) s2[i];
> > + }
> > + for (int i = 0; i < size / 4; i++) {
> > + long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
> > + + tempres[i * 4 + 2] + tempres[i * 4 + 3];
> > + r[i] = test;
> > + }
> > +}
> > +
> > +void
> > +TEST (void)
> > +{
> > + int i;
> > + union256i_d res_256;
> > + union256i_b src2_256;
> > + union256i_b src1_256;
> > + int res_ref_256[8];
> > +
> > + for (i = 0; i < 32; i++)
> > + {
> > + int sign = i % 2 ? 1 : -1;
> > + src1_256.a[i] = 10 + 3 * i + sign;
> > + src2_256.a[i] = sign * 10 * i * i;
> > + }
> > +
> > + for (i = 0; i < 8; i++)
> > + res_256.a[i] = 0x7fffffff;
> > +
> > + CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
> > + res_256.x = _mm256_dpbssd_epi32 (res_256.x, src1_256.x, src2_256.x);
> > + if (check_union256i_d (res_256, res_ref_256))
> > + abort ();
> > +
> > + union128i_d res_128;
> > + union128i_b src2_128;
> > + union128i_b src1_128;
> > + int res_ref_128[4];
> > +
> > + for (i = 0; i < 16; i++)
> > + {
> > + int sign = i % 2 ? 1 : -1;
> > + src1_128.a[i] = 10 + 3 * i * i + sign;
> > + src2_128.a[i] = sign * 10 * i * i;
> > + }
> > +
> > + for (i = 0; i < 4; i++)
> > + res_128.a[i] = 0x7fffffff;
> > +
> > + CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
> > + res_128.x = _mm_dpbssd_epi32 (res_128.x, src1_128.x, src2_128.x);
> > + if (check_union128i_d (res_128, res_ref_128))
> > + abort ();
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbssds-2.c
> > b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbssds-2.c
> > new file mode 100644
> > index 00000000000..6de5062e917
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbssds-2.c
> > @@ -0,0 +1,72 @@
> > +/* { dg-do run } */
> > +/* { dg-options "-O2 -mavxvnniint8" } */
> > +/* { dg-require-effective-target avxvnniint8 } */ #define AVXVNNIINT8
> > +#ifndef CHECK #define CHECK "avx-check.h"
> > +#endif
> > +
> > +#ifndef TEST
> > +#define TEST avx_test
> > +#endif
> > +
> > +#include CHECK
> > +
> > +static void
> > +CALC (int *r, int *dst, char *s1, char *s2, int size) {
> > + short tempres[32];
> > + for (int i = 0; i < size; i++) {
> > + tempres[i] = (short) s1[i] * (short) s2[i];
> > + }
> > + for (int i = 0; i < size / 4; i++) {
> > + long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
> > + + tempres[i * 4 + 2] + tempres[i * 4 + 3];
> > + r[i] = test > 0x7FFFFFFF ? 0x7FFFFFFF : test;
> > + }
> > +}
> > +
> > +void
> > +TEST (void)
> > +{
> > + int i;
> > + union256i_d res_256;
> > + union256i_b src2_256;
> > + union256i_b src1_256;
> > + int res_ref_256[8];
> > +
> > + for (i = 0; i < 32; i++)
> > + {
> > + int sign = i % 2 ? 1 : -1;
> > + src1_256.a[i] = 10 + 3 * i + sign;
> > + src2_256.a[i] = sign * 10 * i * i;
> > + }
> > +
> > + for (i = 0; i < 8; i++)
> > + res_256.a[i] = 0x7fffffff;
> > +
> > + CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
> > + res_256.x = _mm256_dpbssds_epi32 (res_256.x, src1_256.x,
> > + src2_256.x); if (check_union256i_d (res_256, res_ref_256))
> > + abort ();
> > +
> > + union128i_d res_128;
> > + union128i_b src2_128;
> > + union128i_b src1_128;
> > + int res_ref_128[4];
> > +
> > + for (i = 0; i < 16; i++)
> > + {
> > + int sign = i % 2 ? 1 : -1;
> > + src1_128.a[i] = 10 + 3 * i * i + sign;
> > + src2_128.a[i] = sign * 10 * i * i;
> > + }
> > +
> > + for (i = 0; i < 4; i++)
> > + res_128.a[i] = 0x7fffffff;
> > +
> > + CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
> > + res_128.x = _mm_dpbssds_epi32 (res_128.x, src1_128.x, src2_128.x);
> > + if (check_union128i_d (res_128, res_ref_128))
> > + abort ();
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbsud-2.c
> > b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbsud-2.c
> > new file mode 100644
> > index 00000000000..6e4ffd1c7be
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbsud-2.c
> > @@ -0,0 +1,72 @@
> > +/* { dg-do run } */
> > +/* { dg-options "-O2 -mavxvnniint8" } */
> > +/* { dg-require-effective-target avxvnniint8 } */ #define AVXVNNIINT8
> > +#ifndef CHECK #define CHECK "avx-check.h"
> > +#endif
> > +
> > +#ifndef TEST
> > +#define TEST avx_test
> > +#endif
> > +
> > +#include CHECK
> > +
> > +static void
> > +CALC (int *r, int *dst, char *s1, unsigned char *s2, int size) {
> > + short tempres[32];
> > + for (int i = 0; i < size; i++) {
> > + tempres[i] = (short) s1[i] * (unsigned short) s2[i];
> > + }
> > + for (int i = 0; i < size / 4; i++) {
> > + long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
> > + + tempres[i * 4 + 2] + tempres[i * 4 + 3];
> > + r[i] = test;
> > + }
> > +}
> > +
> > +void
> > +TEST (void)
> > +{
> > + int i;
> > + union256i_d res_256;
> > + union256i_b src1_256;
> > + union256i_ub src2_256;
> > + int res_ref_256[8];
> > +
> > + for (i = 0; i < 32; i++)
> > + {
> > + int sign = i % 2 ? 1 : -1;
> > + src1_256.a[i] = 10 + 3 * i + sign;
> > + src2_256.a[i] = sign * 10 * i * i;
> > + }
> > +
> > + for (i = 0; i < 8; i++)
> > + res_256.a[i] = 0x7fffffff;
> > +
> > + CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
> > + res_256.x = _mm256_dpbsud_epi32 (res_256.x, src1_256.x, src2_256.x);
> > + if (check_union256i_d (res_256, res_ref_256))
> > + abort ();
> > +
> > + union128i_d res_128;
> > + union128i_b src1_128;
> > + union128i_ub src2_128;
> > + int res_ref_128[4];
> > +
> > + for (i = 0; i < 16; i++)
> > + {
> > + int sign = i % 2 ? 1 : -1;
> > + src1_128.a[i] = 10 + 3 * i * i + sign;
> > + src2_128.a[i] = sign * 10 * i * i;
> > + }
> > +
> > + for (i = 0; i < 4; i++)
> > + res_128.a[i] = 0x7fffffff;
> > +
> > + CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
> > + res_128.x = _mm_dpbsud_epi32 (res_128.x, src1_128.x, src2_128.x);
> > + if (check_union128i_d (res_128, res_ref_128))
> > + abort ();
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbsuds-2.c
> > b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbsuds-2.c
> > new file mode 100644
> > index 00000000000..ad4b6047ecd
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbsuds-2.c
> > @@ -0,0 +1,72 @@
> > +/* { dg-do run } */
> > +/* { dg-options "-O2 -mavxvnniint8" } */
> > +/* { dg-require-effective-target avxvnniint8 } */ #define AVXVNNIINT8
> > +#ifndef CHECK #define CHECK "avx-check.h"
> > +#endif
> > +
> > +#ifndef TEST
> > +#define TEST avx_test
> > +#endif
> > +
> > +#include CHECK
> > +
> > +static void
> > +CALC (int *r, int *dst, char *s1, unsigned char *s2, int size) {
> > + short tempres[32];
> > + for (int i = 0; i < size; i++) {
> > + tempres[i] = (short) s1[i] * (unsigned short) s2[i];
> > + }
> > + for (int i = 0; i < size / 4; i++) {
> > + long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
> > + + tempres[i * 4 + 2] + tempres[i * 4 + 3];
> > + r[i] = test > 0x7FFFFFFF ? 0x7FFFFFFF : test;
> > + }
> > +}
> > +
> > +void
> > +TEST (void)
> > +{
> > + int i;
> > + union256i_d res_256;
> > + union256i_b src1_256;
> > + union256i_ub src2_256;
> > + int res_ref_256[8];
> > +
> > + for (i = 0; i < 32; i++)
> > + {
> > + int sign = i % 2 ? 1 : -1;
> > + src1_256.a[i] = 10 + 3 * i + sign;
> > + src2_256.a[i] = sign * 10 * i * i;
> > + }
> > +
> > + for (i = 0; i < 8; i++)
> > + res_256.a[i] = 0x7fffffff;
> > +
> > + CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
> > + res_256.x = _mm256_dpbsuds_epi32 (res_256.x, src1_256.x,
> > + src2_256.x); if (check_union256i_d (res_256, res_ref_256))
> > + abort ();
> > +
> > + union128i_d res_128;
> > + union128i_b src1_128;
> > + union128i_ub src2_128;
> > + int res_ref_128[4];
> > +
> > + for (i = 0; i < 16; i++)
> > + {
> > + int sign = i % 2 ? 1 : -1;
> > + src1_128.a[i] = 10 + 3 * i * i + sign;
> > + src2_128.a[i] = sign * 10 * i * i;
> > + }
> > +
> > + for (i = 0; i < 4; i++)
> > + res_128.a[i] = 0x7fffffff;
> > +
> > + CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
> > + res_128.x = _mm_dpbsuds_epi32 (res_128.x, src1_128.x, src2_128.x);
> > + if (check_union128i_d (res_128, res_ref_128))
> > + abort ();
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbuud-2.c
> > b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbuud-2.c
> > new file mode 100644
> > index 00000000000..6590915a459
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbuud-2.c
> > @@ -0,0 +1,72 @@
> > +/* { dg-do run } */
> > +/* { dg-options "-O2 -mavxvnniint8" } */
> > +/* { dg-require-effective-target avxvnniint8 } */ #define AVXVNNIINT8
> > +#ifndef CHECK #define CHECK "avx-check.h"
> > +#endif
> > +
> > +#ifndef TEST
> > +#define TEST avx_test
> > +#endif
> > +
> > +#include CHECK
> > +
> > +static void
> > +CALC (unsigned int *r, unsigned int *dst, unsigned char *s1, unsigned
> > +char *s2, int size) {
> > + unsigned short tempres[32];
> > + for (int i = 0; i < size; i++) {
> > + tempres[i] = (unsigned short) s1[i] * (unsigned short) s2[i];
> > + }
> > + for (int i = 0; i < size / 4; i++) {
> > + unsigned int test = (unsigned int) dst[i] + tempres[i * 4] + tempres[i * 4
> + 1]
> > + + tempres[i * 4 + 2] + tempres[i * 4 + 3];
> > + r[i] = test;
> > + }
> > +}
> > +
> > +void
> > +TEST (void)
> > +{
> > + int i;
> > + union256i_ud res_256;
> > + union256i_ub src2_256;
> > + union256i_ub src1_256;
> > + unsigned int res_ref_256[8];
> > +
> > + for (i = 0; i < 32; i++)
> > + {
> > + int sign = i % 2 ? 1 : -1;
> > + src1_256.a[i] = 10 + 3 * i + sign;
> > + src2_256.a[i] = sign * 10 * i * i;
> > + }
> > +
> > + for (i = 0; i < 8; i++)
> > + res_256.a[i] = 0x7fffffff;
> > +
> > + CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
> > + res_256.x = _mm256_dpbuud_epi32 (res_256.x, src1_256.x, src2_256.x);
> > + if (check_union256i_ud (res_256, res_ref_256))
> > + abort ();
> > +
> > + union128i_ud res_128;
> > + union128i_ub src2_128;
> > + union128i_ub src1_128;
> > + unsigned int res_ref_128[4];
> > +
> > + for (i = 0; i < 16; i++)
> > + {
> > + int sign = i % 2 ? 1 : -1;
> > + src1_128.a[i] = 10 + 3 * i * i + sign;
> > + src2_128.a[i] = sign * 10 * i * i;
> > + }
> > +
> > + for (i = 0; i < 4; i++)
> > + res_128.a[i] = 0x7fffffff;
> > +
> > + CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
> > + res_128.x = _mm_dpbuud_epi32 (res_128.x, src1_128.x, src2_128.x);
> > + if (check_union128i_ud (res_128, res_ref_128))
> > + abort ();
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbuuds-2.c
> > b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbuuds-2.c
> > new file mode 100644
> > index 00000000000..970e4a5d408
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbuuds-2.c
> > @@ -0,0 +1,72 @@
> > +/* { dg-do run } */
> > +/* { dg-options "-O2 -mavxvnniint8" } */
> > +/* { dg-require-effective-target avxvnniint8 } */ #define AVXVNNIINT8
> > +#ifndef CHECK #define CHECK "avx-check.h"
> > +#endif
> > +
> > +#ifndef TEST
> > +#define TEST avx_test
> > +#endif
> > +
> > +#include CHECK
> > +
> > +static void
> > +CALC (unsigned int *r, unsigned int *dst, unsigned char *s1, unsigned
> > +char *s2, int size) {
> > + unsigned short tempres[32];
> > + for (int i = 0; i < size; i++) {
> > + tempres[i] = (unsigned short) s1[i] * (unsigned short) s2[i];
> > + }
> > + for (int i = 0; i < size / 4; i++) {
> > + unsigned int test = (unsigned int) dst[i] + tempres[i * 4] + tempres[i * 4
> + 1]
> > + + tempres[i * 4 + 2] + tempres[i * 4 + 3];
> > + r[i] = test > 0xFFFFFFFF ? 0xFFFFFFFF : test;
> > + }
> > +}
> > +
> > +void
> > +TEST (void)
> > +{
> > + int i;
> > + union256i_ud res_256;
> > + union256i_ub src2_256;
> > + union256i_ub src1_256;
> > + unsigned int res_ref_256[8];
> > +
> > + for (i = 0; i < 32; i++)
> > + {
> > + int sign = i % 2 ? 1 : -1;
> > + src1_256.a[i] = 10 + 3 * i + sign;
> > + src2_256.a[i] = sign * 10 * i * i;
> > + }
> > +
> > + for (i = 0; i < 8; i++)
> > + res_256.a[i] = 0x7fffffff;
> > +
> > + CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
> > + res_256.x = _mm256_dpbuuds_epi32 (res_256.x, src1_256.x,
> > + src2_256.x); if (check_union256i_ud (res_256, res_ref_256))
> > + abort ();
> > +
> > + union128i_ud res_128;
> > + union128i_ub src2_128;
> > + union128i_ub src1_128;
> > + unsigned int res_ref_128[4];
> > +
> > + for (i = 0; i < 16; i++)
> > + {
> > + int sign = i % 2 ? 1 : -1;
> > + src1_128.a[i] = 10 + 3 * i * i + sign;
> > + src2_128.a[i] = sign * 10 * i * i;
> > + }
> > +
> > + for (i = 0; i < 4; i++)
> > + res_128.a[i] = 0x7fffffff;
> > +
> > + CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
> > + res_128.x = _mm_dpbuuds_epi32 (res_128.x, src1_128.x, src2_128.x);
> > + if (check_union128i_ud (res_128, res_ref_128))
> > + abort ();
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc
> > b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
> > index 466555c0d06..a681bffe3e7 100644
> > --- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc
> > +++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
> > @@ -81,6 +81,7 @@ extern void test_widekl (void)
> __attribute__((__target__("widekl")));
> > extern void test_avxvnni (void)
> __attribute__((__target__("avxvnni")));
> > extern void test_avx512fp16 (void)
> __attribute__((__target__("avx512fp16")));
> > extern void test_avxifma (void)
> __attribute__((__target__("avxifma")));
> > +extern void test_avxvnniint8 (void)
> __attribute__((__target__("avxvnniint8")));
> >
> > extern void test_no_sgx (void) __attribute__((__target__("no-
> sgx")));
> > extern void test_no_avx5124fmaps(void)
> __attribute__((__target__("no-avx5124fmaps")));
> > @@ -163,6 +164,7 @@ extern void test_no_widekl (void)
> __attribute__((__target__("no-widekl")));
> > extern void test_no_avxvnni (void) __attribute__((__target__("no-
> avxvnni")));
> > extern void test_no_avx512fp16 (void)
> __attribute__((__target__("no-avx512fp16")));
> > extern void test_no_avxifma (void) __attribute__((__target__("no-
> avxifma")));
> > +extern void test_no_avxvnniint8 (void)
> __attribute__((__target__("no-avxvnniint8")));
> >
> > extern void test_arch_nocona (void)
> __attribute__((__target__("arch=nocona")));
> > extern void test_arch_core2 (void)
> __attribute__((__target__("arch=core2")));
> > diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c
> > b/gcc/testsuite/gcc.target/i386/sse-12.c
> > index fde56261d8f..ddde2df6657 100644
> > --- a/gcc/testsuite/gcc.target/i386/sse-12.c
> > +++ b/gcc/testsuite/gcc.target/i386/sse-12.c
> > @@ -3,7 +3,7 @@
> > popcntintrin.h gfniintrin.h and mm_malloc.h are usable
> > with -O -std=c89 -pedantic-errors. */
> > /* { dg-do compile } */
> > -/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a
> > -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm
> > -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -
> mrtm
> > -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er
> > -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves
> > -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi
> > -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw
> > -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -
> mgfni
> > -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd
> > -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8
> > -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma" } */
> > +/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a
> > +-m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -
> mabm
> > +-mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma
> > +-mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -
> mavx512er
> > +-mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves
> > +-mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi
> > +-mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw
> > +-mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -
> mgfni
> > +-mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd
> > +-mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8
> > +-mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8" } */
> >
> > #include <x86intrin.h>
> >
> > diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c
> > b/gcc/testsuite/gcc.target/i386/sse-13.c
> > index bb29555babe..2b293216c6f 100644
> > --- a/gcc/testsuite/gcc.target/i386/sse-13.c
> > +++ b/gcc/testsuite/gcc.target/i386/sse-13.c
> > @@ -1,5 +1,5 @@
> > /* { dg-do compile } */
> > -/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8
> > -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -
> mpopcnt
> > -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -
> mfma
> > -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -
> mavx512er
> > -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves
> > -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi
> > -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw
> > -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -
> mpku
> > -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -
> mavx512bf16
> > -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl
> > -mwidekl -mavxvnni -mavx512fp16 -mavxifma" } */
> > +/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8
> > +-msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -
> mpopcnt
> > +-mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c
> > +-mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f
> > +-mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec
> > +-mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -
> mavx512vbmi
> > +-mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw
> > +-mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -
> mpku
> > +-msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -
> mavx512bf16
> > +-menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -
> mkl
> > +-mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8" } */
> > /* { dg-add-options bind_pic_locally } */
> >
> > #include <mm_malloc.h>
> > diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c
> > b/gcc/testsuite/gcc.target/i386/sse-14.c
> > index f2701ddaaf9..78b51048b90 100644
> > --- a/gcc/testsuite/gcc.target/i386/sse-14.c
> > +++ b/gcc/testsuite/gcc.target/i386/sse-14.c
> > @@ -1,5 +1,5 @@
> > /* { dg-do compile } */
> > -/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8
> > -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -
> mpopcnt
> > -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -
> mfma
> > -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -
> mavx512er
> > -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves
> > -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma
> > -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw
> > -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -
> mgfni
> > -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd
> > -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8
> > -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma" } */
> > +/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8
> > +-msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -
> mpopcnt
> > +-mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c
> > +-mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f
> > +-mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec
> > +-mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -
> mavx512ifma
> > +-mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw
> > +-mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -
> mgfni
> > +-mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd
> > +-mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8
> > +-mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma
> > +-mavxvnniint8" } */
> > /* { dg-add-options bind_pic_locally } */
> >
> > #include <mm_malloc.h>
> > diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c
> > b/gcc/testsuite/gcc.target/i386/sse-22.c
> > index 3d196975b1e..cc1c8cfa4be 100644
> > --- a/gcc/testsuite/gcc.target/i386/sse-22.c
> > +++ b/gcc/testsuite/gcc.target/i386/sse-22.c
> > @@ -103,7 +103,7 @@
> >
> >
> > #ifndef DIFFERENT_PRAGMAS
> > -#pragma GCC target
> > ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,t
> > bm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f
> > ,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512d
> q
> > ,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512
> vpo
> > pcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxld
> > trk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma")
> > +#pragma GCC target
> > +("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,
> > +tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx51
> >
> +2f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx51
> >
> +2dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx
> 51
> > +2vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,
> > +tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxi
> > +fma,avxvnniint8")
> > #endif
> >
> > /* Following intrinsics require immediate arguments. They @@ -220,7
> > +220,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1)
> >
> > /* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA)
> */
> > #ifdef DIFFERENT_PRAGMAS -#pragma GCC target
> > ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,
> >
> sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5
> 1
> >
> 24fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx5
> > 12vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl
> > ,avxvnni,avx512fp16,avxifma")
> > +#pragma GCC target
> > +("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf
> >
> +,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,av
> x
> >
> +5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,a
> > +vx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,wi
> > +dekl,avxvnni,avx512fp16,avxifma,avxvnniint8")
> > #endif
> > #include <immintrin.h>
> > test_1 (_cvtss_sh, unsigned short, float, 1) diff --git
> > a/gcc/testsuite/gcc.target/i386/sse-23.c
> > b/gcc/testsuite/gcc.target/i386/sse-23.c
> > index d3a233f90fc..270f4483491 100644
> > --- a/gcc/testsuite/gcc.target/i386/sse-23.c
> > +++ b/gcc/testsuite/gcc.target/i386/sse-23.c
> > @@ -843,6 +843,6 @@
> > #define __builtin_ia32_vpclmulqdq_v2di(A, B, C)
> > __builtin_ia32_vpclmulqdq_v2di(A, B, 1) #define
> > __builtin_ia32_vpclmulqdq_v8di(A, B, C)
> > __builtin_ia32_vpclmulqdq_v8di(A, B, 1)
> >
> > -#pragma GCC target
> > ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,t
> > bm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx
> > 512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflusho
> >
> pt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5
> 1
> > 24vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vb
> >
> mi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512v
> > p2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,av
> > xvnni,avx512fp16,avxifma")
> > +#pragma GCC target
> > +("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,
> > +tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,a
> > +vx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflu
> >
> +shopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,
> a
> > +vx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx
> >
> +512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,
> a
> > +vx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,wi
> > +dekl,avxvnni,avx512fp16,avxifma,avxvnniint8")
> >
> > #include <x86intrin.h>
> > diff --git a/gcc/testsuite/lib/target-supports.exp
> > b/gcc/testsuite/lib/target-supports.exp
> > index 69de3b96bfc..64ccfc746bd 100644
> > --- a/gcc/testsuite/lib/target-supports.exp
> > +++ b/gcc/testsuite/lib/target-supports.exp
> > @@ -9518,6 +9518,18 @@ proc check_effective_target_avxifma { } {
> > } "-O0 -mavxifma" ]
> > }
> >
> > +# Return 1 if avxvnniint8 instructions can be compiled.
> > +proc check_effective_target_avxvnniint8 { } {
> > + return [check_no_compiler_messages avxvnniint8 object {
> > + typedef int __v8si __attribute__ ((__vector_size__ (32)));
> > + __v8si
> > + _mm256_dpbssd_epi32 (__v8si __A, __v8si __B, __v8si __C)
> > + {
> > + return __builtin_ia32_vpdpbssd256 (__A, __B, __C);
> > + }
> > + } "-O0 -mavxvnniint8" ]
> > +}
> > +
> > # Return 1 if sse instructions can be compiled.
> > proc check_effective_target_sse { } {
> > return [check_no_compiler_messages sse object {
> > --
> > 2.18.1
> >
>
>
> --
> BR,
> Hongtao
On Mon, Oct 17, 2022 at 2:27 PM Jiang, Haochen <haochen.jiang@intel.com> wrote:
>
> > -----Original Message-----
> > From: Hongtao Liu <crazylht@gmail.com>
> > Sent: Monday, October 17, 2022 12:05 PM
> > To: Jiang, Haochen <haochen.jiang@intel.com>
> > Cc: gcc-patches@gcc.gnu.org; Liu, Hongtao <hongtao.liu@intel.com>
> > Subject: Re: [PATCH 2/6] Support Intel AVX-VNNI-INT8
> >
> > On Fri, Oct 14, 2022 at 3:57 PM Haochen Jiang via Gcc-patches <gcc-
> > patches@gcc.gnu.org> wrote:
> > >
> > > From: Kong Lingling <lingling.kong@intel.com>
> > >
> > > gcc/ChangeLog
> > >
> > > * common/config/i386/cpuinfo.h (get_available_features): Detect
> > > avxvnniint8.
> > > * common/config/i386/i386-common.cc
> > > (OPTION_MASK_ISA2_AVXVNNIINT8_SET): New.
> > > (OPTION_MASK_ISA2_AVXVNNIINT8_UNSET): Ditto.
> > > (ix86_handle_option): Handle -mavxvnniint8.
> > > * common/config/i386/i386-cpuinfo.h (enum processor_features):
> > > Add FEATURE_AVXVNNIINT8.
> > > * common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
> > > avxvnniint8.
> > > * config.gcc: Add avxvnniint8intrin.h.
> > > * config/i386/avxvnniint8intrin.h: New file.
> > > * config/i386/cpuid.h (bit_AVXVNNIINT8): New.
> > > * config/i386/i386-builtin.def: Add new builtins.
> > > * config/i386/i386-c.cc (ix86_target_macros_internal): Define
> > > __AVXVNNIINT8__.
> > > * config/i386/i386-options.cc (isa2_opts): Add -mavxvnniint8.
> > > (ix86_valid_target_attribute_inner_p): Handle avxvnniint8.
> > > * config/i386/i386-isa.def: Add DEF_PTA(AVXVNNIINT8) New..
> > > * config/i386/i386.opt: Add option -mavxvnniint8.
> > > * config/i386/immintrin.h: Include avxvnniint8intrin.h.
> > > * config/i386/sse.md
> > > (vpdp<vpdotprodtype>_<mode>): New define_insn.
> > > * doc/extend.texi: Document avxvnniint8.
> > > * doc/invoke.texi: Document -mavxvnniint8.
> > > * doc/sourcebuild.texi: Document target avxvnniint8.
> > >
> > > gcc/testsuite/ChangeLog
> > >
> > > * g++.dg/other/i386-2.C: Add -mavxvnniint8.
> > > * g++.dg/other/i386-3.C: Ditto.
> > > * gcc.target/i386/avx-check.h: Add avxvnniint8 check.
> > > * gcc.target/i386/sse-12.c: Add -mavxvnniint8.
> > > * gcc.target/i386/sse-13.c: Ditto.
> > > * gcc.target/i386/sse-14.c: Ditto.
> > > * gcc.target/i386/sse-22.c: Ditto.
> > > * gcc.target/i386/sse-23.c: Ditto.
> > > * gcc.target/i386/funcspec-56.inc: Add new target attribute.
> > > * lib/target-supports.exp
> > > (check_effective_target_avxvnniint8): New.
> > > * gcc.target/i386/avxvnniint8-1.c: Ditto.
> > > * gcc.target/i386/avxvnniint8-vpdpbssd-2.c: Ditto.
> > > * gcc.target/i386/avxvnniint8-vpdpbssds-2.c: Ditto.
> > > * gcc.target/i386/avxvnniint8-vpdpbsud-2.c: Ditto.
> > > * gcc.target/i386/avxvnniint8-vpdpbsuds-2.c: Ditto.
> > > * gcc.target/i386/avxvnniint8-vpdpbuud-2.c: Ditto.
> > > * gcc.target/i386/avxvnniint8-vpdpbuuds-2.c: Ditto.
> > >
> > > Co-authored-by: Hongyu Wang <hongyu.wang@intel.com>
> > > Co-authored-by: Haochen Jiang <haochen.jiang@intel.com>
> > > ---
> > > gcc/common/config/i386/cpuinfo.h | 2 +
> > > gcc/common/config/i386/i386-common.cc | 22 ++-
> > > gcc/common/config/i386/i386-cpuinfo.h | 1 +
> > > gcc/common/config/i386/i386-isas.h | 2 +
> > > gcc/config.gcc | 2 +-
> > > gcc/config/i386/avxvnniint8intrin.h | 138 ++++++++++++++++++
> > > gcc/config/i386/cpuid.h | 1 +
> > > gcc/config/i386/i386-builtin.def | 14 ++
> > > gcc/config/i386/i386-c.cc | 2 +
> > > gcc/config/i386/i386-isa.def | 1 +
> > > gcc/config/i386/i386-options.cc | 4 +-
> > > gcc/config/i386/i386.opt | 5 +
> > > gcc/config/i386/immintrin.h | 2 +
> > > gcc/config/i386/sse.md | 31 ++++
> > > gcc/doc/extend.texi | 5 +
> > > gcc/doc/invoke.texi | 9 +-
> > > gcc/doc/sourcebuild.texi | 3 +
> > > gcc/testsuite/g++.dg/other/i386-2.C | 2 +-
> > > gcc/testsuite/g++.dg/other/i386-3.C | 2 +-
> > > gcc/testsuite/gcc.target/i386/avx-check.h | 3 +
> > > gcc/testsuite/gcc.target/i386/avxvnniint8-1.c | 43 ++++++
> > > .../gcc.target/i386/avxvnniint8-vpdpbssd-2.c | 72 +++++++++
> > > .../gcc.target/i386/avxvnniint8-vpdpbssds-2.c | 72 +++++++++
> > > .../gcc.target/i386/avxvnniint8-vpdpbsud-2.c | 72 +++++++++
> > > .../gcc.target/i386/avxvnniint8-vpdpbsuds-2.c | 72 +++++++++
> > > .../gcc.target/i386/avxvnniint8-vpdpbuud-2.c | 72 +++++++++
> > > .../gcc.target/i386/avxvnniint8-vpdpbuuds-2.c | 72 +++++++++
> > > gcc/testsuite/gcc.target/i386/funcspec-56.inc | 2 +
> > > gcc/testsuite/gcc.target/i386/sse-12.c | 2 +-
> > > gcc/testsuite/gcc.target/i386/sse-13.c | 2 +-
> > > gcc/testsuite/gcc.target/i386/sse-14.c | 2 +-
> > > gcc/testsuite/gcc.target/i386/sse-22.c | 4 +-
> > > gcc/testsuite/gcc.target/i386/sse-23.c | 2 +-
> > > gcc/testsuite/lib/target-supports.exp | 12 ++
> > > 34 files changed, 738 insertions(+), 14 deletions(-) create mode
> > > 100644 gcc/config/i386/avxvnniint8intrin.h
> > > create mode 100644 gcc/testsuite/gcc.target/i386/avxvnniint8-1.c
> > > create mode 100644
> > > gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbssd-2.c
> > > create mode 100644
> > > gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbssds-2.c
> > > create mode 100644
> > > gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbsud-2.c
> > > create mode 100644
> > > gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbsuds-2.c
> > > create mode 100644
> > > gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbuud-2.c
> > > create mode 100644
> > > gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbuuds-2.c
> > >
> > > diff --git a/gcc/common/config/i386/cpuinfo.h
> > > b/gcc/common/config/i386/cpuinfo.h
> > > index 9bb21c6cacc..bed88003f8e 100644
> > > --- a/gcc/common/config/i386/cpuinfo.h
> > > +++ b/gcc/common/config/i386/cpuinfo.h
> > > @@ -795,6 +795,8 @@ get_available_features (struct __processor_model
> > *cpu_model,
> > > set_feature (FEATURE_AVXVNNI);
> > > if (eax & bit_AVXIFMA)
> > > set_feature (FEATURE_AVXIFMA);
> > > + if (edx & bit_AVXVNNIINT8)
> > > + set_feature (FEATURE_AVXVNNIINT8);
> > > }
> > > if (avx512_usable)
> > > {
> > > diff --git a/gcc/common/config/i386/i386-common.cc
> > > b/gcc/common/config/i386/i386-common.cc
> > > index 4de7906b247..6a2a7e3d25a 100644
> > > --- a/gcc/common/config/i386/i386-common.cc
> > > +++ b/gcc/common/config/i386/i386-common.cc
> > > @@ -108,6 +108,7 @@ along with GCC; see the file COPYING3. If not see
> > > #define OPTION_MASK_ISA2_AMX_TILE_SET
> > OPTION_MASK_ISA2_AMX_TILE
> > > #define OPTION_MASK_ISA2_AMX_INT8_SET
> > OPTION_MASK_ISA2_AMX_INT8
> > > #define OPTION_MASK_ISA2_AMX_BF16_SET
> > OPTION_MASK_ISA2_AMX_BF16
> > > +#define OPTION_MASK_ISA2_AVXVNNIINT8_SET
> > OPTION_MASK_ISA2_AVXVNNIINT8
> > >
> > > /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
> > > as -msse4.2. */
> > > @@ -214,7 +215,7 @@ along with GCC; see the file COPYING3. If not see
> > > (OPTION_MASK_ISA_AVX2 | OPTION_MASK_ISA_AVX512F_UNSET)
> > #define
> > > OPTION_MASK_ISA2_AVX2_UNSET \
> > > (OPTION_MASK_ISA2_AVXIFMA_UNSET |
> > OPTION_MASK_ISA2_AVXVNNI_UNSET \
> > > - | OPTION_MASK_ISA2_AVX512F_UNSET)
> > > + | OPTION_MASK_ISA2_AVXVNNIINT8_UNSET |
> > > + OPTION_MASK_ISA2_AVX512F_UNSET)
> > > #define OPTION_MASK_ISA_AVX512F_UNSET \
> > > (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD_UNSET
> > \
> > > | OPTION_MASK_ISA_AVX512PF_UNSET |
> > OPTION_MASK_ISA_AVX512ER_UNSET
> > > \ @@ -278,6 +279,7 @@ along with GCC; see the file COPYING3. If not
> > > see #define OPTION_MASK_ISA2_KL_UNSET \
> > > (OPTION_MASK_ISA2_KL | OPTION_MASK_ISA2_WIDEKL_UNSET)
> > #define
> > > OPTION_MASK_ISA2_WIDEKL_UNSET OPTION_MASK_ISA2_WIDEKL
> > > +#define OPTION_MASK_ISA2_AVXVNNIINT8_UNSET
> > > +OPTION_MASK_ISA2_AVXVNNIINT8
> > >
> > > /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
> > > as -mno-sse4.1. */
> > > @@ -1142,6 +1144,24 @@ ix86_handle_option (struct gcc_options *opts,
> > > }
> > > return true;
> > >
> > > + case OPT_mavxvnniint8:
> > > + if (value)
> > > + {
> > > + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVXVNNIINT8_SET;
> > > + opts->x_ix86_isa_flags2_explicit |=
> > > + OPTION_MASK_ISA2_AVXVNNIINT8_SET;
> > > + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2_SET;
> > > + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX2_SET;
> > > + }
> > > + else
> > > + {
> > > + opts->x_ix86_isa_flags2 &=
> > > + ~OPTION_MASK_ISA2_AVXVNNIINT8_UNSET;
> > > + opts->x_ix86_isa_flags2_explicit |=
> > > + OPTION_MASK_ISA2_AVXVNNIINT8_UNSET;
> > > + }
> > > + return true;
> > > +
> > > case OPT_mfma:
> > > if (value)
> > > {
> > > diff --git a/gcc/common/config/i386/i386-cpuinfo.h
> > > b/gcc/common/config/i386/i386-cpuinfo.h
> > > index 968f9a56a6c..9a6b92fab79 100644
> > > --- a/gcc/common/config/i386/i386-cpuinfo.h
> > > +++ b/gcc/common/config/i386/i386-cpuinfo.h
> > > @@ -241,6 +241,7 @@ enum processor_features
> > > FEATURE_X86_64_V3,
> > > FEATURE_X86_64_V4,
> > > FEATURE_AVXIFMA,
> > > + FEATURE_AVXVNNIINT8,
> > > CPU_FEATURE_MAX
> > > };
> > >
> > > diff --git a/gcc/common/config/i386/i386-isas.h
> > > b/gcc/common/config/i386/i386-isas.h
> > > index b05b4bb8f0d..8c1f351056c 100644
> > > --- a/gcc/common/config/i386/i386-isas.h
> > > +++ b/gcc/common/config/i386/i386-isas.h
> > > @@ -176,4 +176,6 @@ ISA_NAMES_TABLE_START
> > > ISA_NAMES_TABLE_ENTRY("x86-64-v3", FEATURE_X86_64_V3,
> > P_X86_64_V3, NULL)
> > > ISA_NAMES_TABLE_ENTRY("x86-64-v4", FEATURE_X86_64_V4,
> > P_X86_64_V4, NULL)
> > > ISA_NAMES_TABLE_ENTRY("avxifma", FEATURE_AVXIFMA, P_NONE,
> > > "-mavxifma")
> > > + ISA_NAMES_TABLE_ENTRY("avxvnniint8", FEATURE_AVXVNNIINT8,
> > > + P_NONE, "-mavxvnniint8")
> > > ISA_NAMES_TABLE_END
> > > diff --git a/gcc/config.gcc b/gcc/config.gcc index
> > > 12365abbf86..4df78238910 100644
> > > --- a/gcc/config.gcc
> > > +++ b/gcc/config.gcc
> > > @@ -422,7 +422,7 @@ i[34567]86-*-* | x86_64-*-*)
> > > amxbf16intrin.h x86gprintrin.h uintrintrin.h
> > > hresetintrin.h keylockerintrin.h avxvnniintrin.h
> > > mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h
> > > - avxifmaintrin.h"
> > > + avxifmaintrin.h avxvnniint8intrin.h"
> > > ;;
> > > ia64-*-*)
> > > extra_headers=ia64intrin.h
> > > diff --git a/gcc/config/i386/avxvnniint8intrin.h
> > > b/gcc/config/i386/avxvnniint8intrin.h
> > > new file mode 100644
> > > index 00000000000..362e6f65c2a
> > > --- /dev/null
> > > +++ b/gcc/config/i386/avxvnniint8intrin.h
> > > @@ -0,0 +1,138 @@
> > > +/* Copyright (C) 2020 Free Software Foundation, Inc.
> > > +
> > > + This file is part of GCC.
> > > +
> > > + GCC is free software; you can redistribute it and/or modify
> > > + it under the terms of the GNU General Public License as published by
> > > + the Free Software Foundation; either version 3, or (at your option)
> > > + any later version.
> > > +
> > > + GCC is distributed in the hope that it will be useful,
> > > + but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> > > + GNU General Public License for more details.
> > > +
> > > + Under Section 7 of GPL version 3, you are granted additional
> > > + permissions described in the GCC Runtime Library Exception, version
> > > + 3.1, as published by the Free Software Foundation.
> > > +
> > > + You should have received a copy of the GNU General Public License and
> > > + a copy of the GCC Runtime Library Exception along with this program;
> > > + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
> > > + <http://www.gnu.org/licenses/>. */
> > > +
> > > +#if !defined _IMMINTRIN_H_INCLUDED
> > > +#error "Never use <avxvnniint8vlintrin.h> directly; include <immintrin.h>
> > instead."
> > > +#endif
> > > +
> > > +#ifndef _AVXVNNIINT8INTRIN_H_INCLUDED #define
> > > +_AVXVNNIINT8INTRIN_H_INCLUDED
> > > +
> > > +#if !defined(__AVXVNNIINT8__)
> > > +#pragma GCC push_options
> > > +#pragma GCC target("avxvnniint8")
> > > +#define __DISABLE_AVXVNNIINT8__
> > > +#endif /* __AVXVNNIINT8__ */
> > > +
> > > +extern __inline __m128i
> > > +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > > +_mm_dpbssd_epi32 (__m128i __W, __m128i __A, __m128i __B) {
> > > + return (__m128i)
> > > + __builtin_ia32_vpdpbssd128 ((__v4si) __W, (__v4si) __A, (__v4si)
> > > +__B); }
> > > +
> > > +extern __inline __m128i
> > > +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > > +_mm_dpbssds_epi32 (__m128i __W, __m128i __A, __m128i __B) {
> > > + return (__m128i)
> > > + __builtin_ia32_vpdpbssds128 ((__v4si) __W, (__v4si) __A, (__v4si)
> > > +__B); }
> > > +
> > > +extern __inline __m128i
> > > +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > > +_mm_dpbsud_epi32 (__m128i __W, __m128i __A, __m128i __B) {
> > > + return (__m128i)
> > > + __builtin_ia32_vpdpbsud128 ((__v4si) __W, (__v4si) __A, (__v4si)
> > > +__B); }
> > > +
> > > +extern __inline __m128i
> > > +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > > +_mm_dpbsuds_epi32 (__m128i __W, __m128i __A, __m128i __B) {
> > > + return (__m128i)
> > > + __builtin_ia32_vpdpbsuds128 ((__v4si) __W, (__v4si) __A, (__v4si)
> > > +__B); }
> > > +
> > > +extern __inline __m128i
> > > +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > > +_mm_dpbuud_epi32 (__m128i __W, __m128i __A, __m128i __B) {
> > > + return (__m128i)
> > > + __builtin_ia32_vpdpbuud128 ((__v4si) __W, (__v4si) __A, (__v4si)
> > > +__B); }
> > > +
> > > +extern __inline __m128i
> > > +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > > +_mm_dpbuuds_epi32 (__m128i __W, __m128i __A, __m128i __B) {
> > > + return (__m128i)
> > > + __builtin_ia32_vpdpbuuds128 ((__v4si) __W, (__v4si) __A, (__v4si)
> > > +__B); }
> > > +
> > > +extern __inline __m256i
> > > +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > > +_mm256_dpbssd_epi32 (__m256i __W, __m256i __A, __m256i __B) {
> > > + return (__m256i)
> > > + __builtin_ia32_vpdpbssd256 ((__v8si) __W, (__v8si) __A, (__v8si)
> > > +__B); }
> > > +
> > > +extern __inline __m256i
> > > +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > > +_mm256_dpbssds_epi32 (__m256i __W, __m256i __A, __m256i __B) {
> > > + return (__m256i)
> > > + __builtin_ia32_vpdpbssds256 ((__v8si) __W, (__v8si) __A, (__v8si)
> > > +__B); }
> > > +
> > > +extern __inline __m256i
> > > +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > > +_mm256_dpbsud_epi32 (__m256i __W, __m256i __A, __m256i __B) {
> > > + return (__m256i)
> > > + __builtin_ia32_vpdpbsud256 ((__v8si) __W, (__v8si) __A, (__v8si)
> > > +__B); }
> > > +
> > > +extern __inline __m256i
> > > +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > > +_mm256_dpbsuds_epi32 (__m256i __W, __m256i __A, __m256i __B) {
> > > + return (__m256i)
> > > + __builtin_ia32_vpdpbsuds256 ((__v8si) __W, (__v8si) __A, (__v8si)
> > > +__B); }
> > > +
> > > +extern __inline __m256i
> > > +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > > +_mm256_dpbuud_epi32 (__m256i __W, __m256i __A, __m256i __B) {
> > > + return (__m256i)
> > > + __builtin_ia32_vpdpbuud256 ((__v8si) __W, (__v8si) __A, (__v8si)
> > > +__B); }
> > > +
> > > +extern __inline __m256i
> > > +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > > +_mm256_dpbuuds_epi32 (__m256i __W, __m256i __A, __m256i __B) {
> > > + return (__m256i)
> > > + __builtin_ia32_vpdpbuuds256 ((__v8si) __W, (__v8si) __A, (__v8si)
> > > +__B); }
> > > +
> > > +#ifdef __DISABLE_AVXVNNIINT8__
> > > +#undef __DISABLE_AVXVNNIINT8__
> > > +#pragma GCC pop_options
> > > +#endif /* __DISABLE_AVXVNNIINT8__ */
> > > +
> > > +#endif /* __AVXVNNIINT8INTRIN_H_INCLUDED */
> > > diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h index
> > > 9885699efd5..f5fad22149a 100644
> > > --- a/gcc/config/i386/cpuid.h
> > > +++ b/gcc/config/i386/cpuid.h
> > > @@ -49,6 +49,7 @@
> > > #define bit_RDRND (1 << 30)
> > >
> > > /* %edx */
> > > +#define bit_AVXVNNIINT8 (1 << 4)
> > > #define bit_CMPXCHG8B (1 << 8)
> > > #define bit_CMOV (1 << 15)
> > > #define bit_MMX (1 << 23)
> > > diff --git a/gcc/config/i386/i386-builtin.def
> > > b/gcc/config/i386/i386-builtin.def
> > > index 4a89099a00f..e6edae5728b 100644
> > > --- a/gcc/config/i386/i386-builtin.def
> > > +++ b/gcc/config/i386/i386-builtin.def
> > > @@ -2696,6 +2696,20 @@ BDESC (OPTION_MASK_ISA_AVX512VNNI |
> > > OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_A BDESC
> > > (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0,
> > > CODE_FOR_vpdpwssds_v4si_mask,
> > "__builtin_ia32_vpdpwssds_v4si_mask",
> > > IX86_BUILTIN_VPDPWSSDSV4SI_MASK, UNKNOWN, (int)
> > > V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) BDESC
> > (OPTION_MASK_ISA_AVX512VNNI |
> > > OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_maskz,
> > > "__builtin_ia32_vpdpwssds_v4si_maskz",
> > > IX86_BUILTIN_VPDPWSSDSV4SI_MASKZ, UNKNOWN, (int)
> > > V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
> > >
> > > +/* AVXVNNIINT8 */
> > > +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8,
> > CODE_FOR_vpdpbssd_v8si,
> > > +"__builtin_ia32_vpdpbssd256", IX86_BUILTIN_VPDPBSSDV8SI,
> > UNKNOWN,
> > > +(int) V8SI_FTYPE_V8SI_V8SI_V8SI) BDESC (0,
> > > +OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbssds_v8si,
> > > +"__builtin_ia32_vpdpbssds256", IX86_BUILTIN_VPDPBSSDSV8SI,
> > UNKNOWN,
> > > +(int) V8SI_FTYPE_V8SI_V8SI_V8SI) BDESC (0,
> > > +OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbsud_v8si,
> > > +"__builtin_ia32_vpdpbsud256", IX86_BUILTIN_VPDPBSUDV8SI,
> > UNKNOWN,
> > > +(int) V8SI_FTYPE_V8SI_V8SI_V8SI) BDESC (0,
> > > +OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbsuds_v8si,
> > > +"__builtin_ia32_vpdpbsuds256", IX86_BUILTIN_VPDPBSUDSV8SI,
> > UNKNOWN,
> > > +(int) V8SI_FTYPE_V8SI_V8SI_V8SI) BDESC (0,
> > > +OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbuud_v8si,
> > > +"__builtin_ia32_vpdpbuud256", IX86_BUILTIN_VPDPBUUDV8SI,
> > UNKNOWN,
> > > +(int) V8SI_FTYPE_V8SI_V8SI_V8SI) BDESC (0,
> > > +OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbuuds_v8si,
> > > +"__builtin_ia32_vpdpbuuds256", IX86_BUILTIN_VPDPBUUDSV8SI,
> > UNKNOWN,
> > > +(int) V8SI_FTYPE_V8SI_V8SI_V8SI) BDESC (0,
> > > +OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbssd_v4si,
> > > +"__builtin_ia32_vpdpbssd128", IX86_BUILTIN_VPDPBSSDV4SI,
> > UNKNOWN,
> > > +(int) V4SI_FTYPE_V4SI_V4SI_V4SI) BDESC (0,
> > > +OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbssds_v4si,
> > > +"__builtin_ia32_vpdpbssds128", IX86_BUILTIN_VPDPBSSDSV4SI,
> > UNKNOWN,
> > > +(int) V4SI_FTYPE_V4SI_V4SI_V4SI) BDESC (0,
> > > +OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbsud_v4si,
> > > +"__builtin_ia32_vpdpbsud128", IX86_BUILTIN_VPDPBSUDV4SI,
> > UNKNOWN,
> > > +(int) V4SI_FTYPE_V4SI_V4SI_V4SI) BDESC (0,
> > > +OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbsuds_v4si,
> > > +"__builtin_ia32_vpdpbsuds128", IX86_BUILTIN_VPDPBSUDSV4SI,
> > UNKNOWN,
> > > +(int) V4SI_FTYPE_V4SI_V4SI_V4SI) BDESC (0,
> > > +OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbuud_v4si,
> > > +"__builtin_ia32_vpdpbuud128", IX86_BUILTIN_VPDPBUUDV4SI,
> > UNKNOWN,
> > > +(int) V4SI_FTYPE_V4SI_V4SI_V4SI) BDESC (0,
> > > +OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbuuds_v4si,
> > > +"__builtin_ia32_vpdpbuuds128", IX86_BUILTIN_VPDPBUUDSV4SI,
> > UNKNOWN,
> > > +(int) V4SI_FTYPE_V4SI_V4SI_V4SI)
> > > +
> > > /* VPCLMULQDQ */
> > > BDESC (OPTION_MASK_ISA_VPCLMULQDQ |
> > OPTION_MASK_ISA_AVX512VL, 0,
> > > CODE_FOR_vpclmulqdq_v2di, "__builtin_ia32_vpclmulqdq_v2di",
> > > IX86_BUILTIN_VPCLMULQDQ2, UNKNOWN, (int)
> > V2DI_FTYPE_V2DI_V2DI_INT)
> > > BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX, 0,
> > > CODE_FOR_vpclmulqdq_v4di, "__builtin_ia32_vpclmulqdq_v4di",
> > > IX86_BUILTIN_VPCLMULQDQ4, UNKNOWN, (int)
> > V4DI_FTYPE_V4DI_V4DI_INT)
> > > diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
> > > index 3494ec035d5..a9a35c0a18a 100644
> > > --- a/gcc/config/i386/i386-c.cc
> > > +++ b/gcc/config/i386/i386-c.cc
> > > @@ -635,6 +635,8 @@ ix86_target_macros_internal (HOST_WIDE_INT
> > isa_flag,
> > > def_or_undef (parse_in, "__AVXVNNI__");
> > > if (isa_flag2 & OPTION_MASK_ISA2_AVXIFMA)
> > > def_or_undef (parse_in, "__AVXIFMA__");
> > > + if (isa_flag2 & OPTION_MASK_ISA2_AVXVNNIINT8)
> > > + def_or_undef (parse_in, "__AVXVNNIINT8__");
> > > if (TARGET_IAMCU)
> > > {
> > > def_or_undef (parse_in, "__iamcu"); diff --git
> > > a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def index
> > > 6e0254ce418..c95b917c6ce 100644
> > > --- a/gcc/config/i386/i386-isa.def
> > > +++ b/gcc/config/i386/i386-isa.def
> > > @@ -110,3 +110,4 @@ DEF_PTA(WIDEKL)
> > > DEF_PTA(AVXVNNI)
> > > DEF_PTA(AVX512FP16)
> > > DEF_PTA(AVXIFMA)
> > > +DEF_PTA(AVXVNNIINT8)
> > > diff --git a/gcc/config/i386/i386-options.cc
> > > b/gcc/config/i386/i386-options.cc index 5facb64c2a8..3e6d04433a6
> > > 100644
> > > --- a/gcc/config/i386/i386-options.cc
> > > +++ b/gcc/config/i386/i386-options.cc
> > > @@ -227,7 +227,8 @@ static struct ix86_target_opts isa2_opts[] =
> > > { "-mwidekl", OPTION_MASK_ISA2_WIDEKL },
> > > { "-mavxvnni", OPTION_MASK_ISA2_AVXVNNI },
> > > { "-mavx512fp16", OPTION_MASK_ISA2_AVX512FP16 },
> > > - { "-mavxifma", OPTION_MASK_ISA2_AVXIFMA }
> > > + { "-mavxifma", OPTION_MASK_ISA2_AVXIFMA },
> > > + { "-mavxvnniint8", OPTION_MASK_ISA2_AVXVNNIINT8 }
> > > };
> > > static struct ix86_target_opts isa_opts[] = { @@ -1074,6 +1075,7 @@
> > > ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char
> > *p_strings[],
> > > IX86_ATTR_ISA ("avxvnni", OPT_mavxvnni),
> > > IX86_ATTR_ISA ("avx512fp16", OPT_mavx512fp16),
> > > IX86_ATTR_ISA ("avxifma", OPT_mavxifma),
> > > + IX86_ATTR_ISA ("avxvnniint8", OPT_mavxvnniint8),
> > >
> > > /* enum options */
> > > IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), diff --git
> > > a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index
> > > 36e28b7063d..53d534f6392 100644
> > > --- a/gcc/config/i386/i386.opt
> > > +++ b/gcc/config/i386/i386.opt
> > > @@ -1219,3 +1219,8 @@ mavxifma
> > > Target Mask(ISA2_AVXIFMA) Var(ix86_isa_flags2) Save Support MMX,
> > > SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, and AVXIFMA
> > > built-in functions and code generation.
> > > +
> > > +mavxvnniint8
> > > +Target Mask(ISA2_AVXVNNIINT8) Var(ix86_isa_flags2) Save Support
> > MMX,
> > > +SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and
> > > +AVXVNNIINT8 built-in functions and code generation.
> > > diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
> > > index e9d4e975243..ddea249d09b 100644
> > > --- a/gcc/config/i386/immintrin.h
> > > +++ b/gcc/config/i386/immintrin.h
> > > @@ -46,6 +46,8 @@
> > >
> > > #include <avxifmaintrin.h>
> > >
> > > +#include <avxvnniint8intrin.h>
> > > +
> > > #include <avx2intrin.h>
> > >
> > > #include <avx512fintrin.h>
> > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index
> > > 331347569ea..49490a213ea 100644
> > > --- a/gcc/config/i386/sse.md
> > > +++ b/gcc/config/i386/sse.md
> > > @@ -200,6 +200,13 @@
> > > UNSPEC_COMPLEX_FCMUL
> > > UNSPEC_COMPLEX_MASK
> > >
> > > + ;; For AVX-VNNI-INT8 support
> > > + UNSPEC_VPDPBSSD
> > > + UNSPEC_VPDPBSSDS
> > > + UNSPEC_VPDPBSUD
> > > + UNSPEC_VPDPBSUDS
> > > + UNSPEC_VPDPBUUD
> > > + UNSPEC_VPDPBUUDS
> > > ])
> > Can we have unified naming conversion for the UNSPECs and those in
> > AVX512(AVX)_VNNI, since they have similar semantics.
> > Maybe change the old UNSPEC_VPMADDUBSWACCD to the "new-style"
> > UNSPEC_VPDPBUSD? Similar for other UNSPEC_VPMADD***.
> I suppose we can do that in this patch to avoid confusion.
Sure.
> > >
> > > (define_c_enum "unspecv" [
> > > @@ -29241,3 +29248,27 @@
> > > gcc_unreachable ();
> > > DONE;
> > > })
> > > +
> > > +(define_int_iterator VPDOTPROD
> > > + [UNSPEC_VPDPBSSD
> > > + UNSPEC_VPDPBSSDS
> > > + UNSPEC_VPDPBSUD
> > > + UNSPEC_VPDPBSUDS
> > > + UNSPEC_VPDPBUUD
> > > + UNSPEC_VPDPBUUDS])
> > > +
> > > +(define_int_attr vpdotprodtype
> > > + [(UNSPEC_VPDPBSSD "bssd") (UNSPEC_VPDPBSSDS "bssds")
> > > + (UNSPEC_VPDPBSUD "bsud") (UNSPEC_VPDPBSUDS "bsuds")
> > > + (UNSPEC_VPDPBUUD "buud") (UNSPEC_VPDPBUUDS "buuds")])
> > > +
> > > +(define_insn "vpdp<vpdotprodtype>_<mode>"
> > > + [(set (match_operand:VI4_AVX 0 "register_operand" "=x")
> > > + (unspec:VI4_AVX
> > > + [(match_operand:VI4_AVX 1 "register_operand" "0")
> > > + (match_operand:VI4_AVX 2 "register_operand" "x")
> > > + (match_operand:VI4_AVX 3 "nonimmediate_operand" "xm")]
> > > + VPDOTPROD))]
> > > + "TARGET_AVXVNNIINT8"
> > > + "vpdp<vpdotprodtype>\t{%3, %2, %0|%0, %2, %3}"
> > > + [(set_attr "prefix" "vex")])
> > > diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index
> > > edecf5c0070..9a8de9fc226 100644
> > > --- a/gcc/doc/extend.texi
> > > +++ b/gcc/doc/extend.texi
> > > @@ -7065,6 +7065,11 @@ Enable/disable the generation of the AVXVNNI
> > instructions.
> > > @cindex @code{target("avxifma")} function attribute, x86
> > > Enable/disable the generation of the AVXIFMA instructions.
> > >
> > > +@item avxvnniint8
> > > +@itemx no-avxvnniint8
> > > +@cindex @code{target("avxvnniint8")} function attribute, x86
> > > +Enable/disable the generation of the AVXVNNIINT8 instructions.
> > > +
> > > @item cld
> > > @itemx no-cld
> > > @cindex @code{target("cld")} function attribute, x86 diff --git
> > > a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index
> > > 886fc1d0164..d4ff7549bf3 100644
> > > --- a/gcc/doc/invoke.texi
> > > +++ b/gcc/doc/invoke.texi
> > > @@ -1436,7 +1436,7 @@ See RS/6000 and PowerPC Options.
> > > -mavx5124fmaps -mavx512vnni -mavx5124vnniw -mprfchw -mrdpid
> > @gol
> > > -mrdseed -msgx -mavx512vp2intersect -mserialize -mtsxldtrk@gol
> > > -mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset -mavxvnni@gol
> > > --mavx512fp16 -mavxifma @gol
> > > +-mavx512fp16 -mavxifma -mavxvnniint8 @gol
> > > -mcldemote -mms-bitfields -mno-align-stringops
> > > -minline-all-stringops @gol -minline-stringops-dynamically
> > > -mstringop-strategy=@var{alg} @gol -mkl -mwidekl @gol @@ -32896,6
> > > +32896,9 @@ preferred alignment to @option{-mpreferred-stack-
> > boundary=2}.
> > > @need 200
> > > @itemx -mavxifma
> > > @opindex mavxifma
> > > +@need 200
> > > +@itemx -mavxvnniint8
> > > +@opindex mavxvnniint8
> > > These switches enable the use of instructions in the MMX, SSE, SSE2,
> > > SSE3, SSSE3, SSE4, SSE4A, SSE4.1, SSE4.2, AVX, AVX2, AVX512F,
> > > AVX512PF, AVX512ER, AVX512CD, AVX512VL, AVX512BW, AVX512DQ,
> > > AVX512IFMA, AVX512VBMI, SHA, @@ -32906,8 +32909,8 @@ XSAVEOPT,
> > XSAVEC,
> > > XSAVES, RTM, HLE, TBM, MWAITX, CLZERO, PKU, AVX512VBMI2, GFNI,
> > VAES,
> > > WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B,
> > AVX512BF16,
> > > ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI,
> > AVX5124VNNIW,
> > > SERIALIZE, UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL,
> > > AVXVNNI, AVX512FP16, -AVXIFMA or CLDEMOTE extended instruction sets.
> > Each has a corresponding -@option{-mno-} option to disable use of these
> > instructions.
> > > +AVXIFMA, AVXVNNIINT8 or CLDEMOTE extended instruction sets. Each
> > has
> > > +a corresponding @option{-mno-} option to disable use of these
> > instructions.
> > >
> > > These extensions are also available as built-in functions: see
> > > @ref{x86 Built-in Functions}, for details of the functions enabled
> > > and diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
> > > index 0173acf4a65..e21a1d381e0 100644
> > > --- a/gcc/doc/sourcebuild.texi
> > > +++ b/gcc/doc/sourcebuild.texi
> > > @@ -2493,6 +2493,9 @@ Target supports the execution of
> > @code{avx512vp2intersect} instructions.
> > > @item avxifma
> > > Target supports the execution of @code{avxifma} instructions.
> > >
> > > +@item avxvnniint8
> > > +Target supports the execution of @code{avxvnniint8} instructions.
> > > +
> > > @item amx_tile
> > > Target supports the execution of @code{amx-tile} instructions.
> > >
> > > diff --git a/gcc/testsuite/g++.dg/other/i386-2.C
> > > b/gcc/testsuite/g++.dg/other/i386-2.C
> > > index 5388606779b..ebd01fe47bc 100644
> > > --- a/gcc/testsuite/g++.dg/other/i386-2.C
> > > +++ b/gcc/testsuite/g++.dg/other/i386-2.C
> > > @@ -1,5 +1,5 @@
> > > /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
> > > -/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx
> > > -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -
> > mbmi2
> > > -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -
> > mprfchw
> > > -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf
> > > -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq
> > > -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2
> > > -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx
> > > -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig
> > > -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize
> > > -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni
> > > -mavx512fp16 -mavxifma" } */
> > > +/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx
> > > +-mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi
> > > +-mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -
> > mrdseed
> > > +-mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd
> > > +-mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt
> > > +-mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi
> > > +-mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -
> > mclwb
> > > +-mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -
> > mpconfig
> > > +-mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize
> > > +-mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni
> > > +-mavx512fp16 -mavxifma -mavxvnniint8" } */
> > >
> > > /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
> > > xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
> > > diff --git a/gcc/testsuite/g++.dg/other/i386-3.C
> > > b/gcc/testsuite/g++.dg/other/i386-3.C
> > > index 86cedd3d32f..b66498f1d4c 100644
> > > --- a/gcc/testsuite/g++.dg/other/i386-3.C
> > > +++ b/gcc/testsuite/g++.dg/other/i386-3.C
> > > @@ -1,5 +1,5 @@
> > > /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
> > > -/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow
> > > -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -
> > mbmi
> > > -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed
> > > -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd
> > > -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt
> > > -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi
> > > -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -
> > mclwb
> > > -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig
> > > -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize
> > > -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni
> > > -mavx512fp16 -mavxifma" } */
> > > +/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow
> > > +-mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -
> > mbmi
> > > +-mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -
> > mrdseed
> > > +-mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd
> > > +-mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt
> > > +-mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi
> > > +-mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -
> > mclwb
> > > +-mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -
> > mpconfig
> > > +-mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize
> > > +-mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni
> > > +-mavx512fp16 -mavxifma -mavxvnniint8" } */
> > >
> > > /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
> > > xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
> > > diff --git a/gcc/testsuite/gcc.target/i386/avx-check.h
> > > b/gcc/testsuite/gcc.target/i386/avx-check.h
> > > index 24ee6ab4efd..77507ca2edc 100644
> > > --- a/gcc/testsuite/gcc.target/i386/avx-check.h
> > > +++ b/gcc/testsuite/gcc.target/i386/avx-check.h
> > > @@ -25,6 +25,9 @@ main ()
> > > && avx_os_support ()
> > > #ifdef AVXIFMA
> > > && __builtin_cpu_supports ("avxifma")
> > > +#endif
> > > +#ifdef AVXVNNIINT8
> > > + && __builtin_cpu_supports ("avxvnniint8")
> > > #endif
> > > )
> > > {
> > > diff --git a/gcc/testsuite/gcc.target/i386/avxvnniint8-1.c
> > > b/gcc/testsuite/gcc.target/i386/avxvnniint8-1.c
> > > new file mode 100644
> > > index 00000000000..d6942f34d6e
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/avxvnniint8-1.c
> > > @@ -0,0 +1,43 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-mavxvnniint8 -O2" } */
> > > +/* { dg-final { scan-assembler-times "vpdpbssd\[
> > > +\\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-
> > 9\]+\[^\n\r]*%ymm\[0-9\]
> > > ++(?:\n|\[ \\t\]+#)" 1 } } */
> > > +/* { dg-final { scan-assembler-times "vpdpbssd\[
> > > +\\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-
> > 9\]+\[^\n\r]*%xmm\[0-9\]
> > > ++(?:\n|\[ \\t\]+#)" 1 } } */
> > > +/* { dg-final { scan-assembler-times "vpdpbssds\[
> > > +\\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-
> > 9\]+\[^\n\r]*%ymm\[0-9\]
> > > ++(?:\n|\[ \\t\]+#)" 1 } } */
> > > +/* { dg-final { scan-assembler-times "vpdpbssds\[
> > > +\\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-
> > 9\]+\[^\n\r]*%xmm\[0-9\]
> > > ++(?:\n|\[ \\t\]+#)" 1 } } */
> > > +/* { dg-final { scan-assembler-times "vpdpbsud\[
> > > +\\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-
> > 9\]+\[^\n\r]*%ymm\[0-9\]
> > > ++(?:\n|\[ \\t\]+#)" 1 } } */
> > > +/* { dg-final { scan-assembler-times "vpdpbsud\[
> > > +\\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-
> > 9\]+\[^\n\r]*%xmm\[0-9\]
> > > ++(?:\n|\[ \\t\]+#)" 1 } } */
> > > +/* { dg-final { scan-assembler-times "vpdpbsuds\[
> > > +\\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-
> > 9\]+\[^\n\r]*%ymm\[0-9\]
> > > ++(?:\n|\[ \\t\]+#)" 1 } } */
> > > +/* { dg-final { scan-assembler-times "vpdpbsuds\[
> > > +\\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-
> > 9\]+\[^\n\r]*%xmm\[0-9\]
> > > ++(?:\n|\[ \\t\]+#)" 1 } } */
> > > +/* { dg-final { scan-assembler-times "vpdpbuud\[
> > > +\\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-
> > 9\]+\[^\n\r]*%ymm\[0-9\]
> > > ++(?:\n|\[ \\t\]+#)" 1 } } */
> > > +/* { dg-final { scan-assembler-times "vpdpbuud\[
> > > +\\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-
> > 9\]+\[^\n\r]*%xmm\[0-9\]
> > > ++(?:\n|\[ \\t\]+#)" 1 } } */
> > > +/* { dg-final { scan-assembler-times "vpdpbuuds\[
> > > +\\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-
> > 9\]+\[^\n\r]*%ymm\[0-9\]
> > > ++(?:\n|\[ \\t\]+#)" 1 } } */
> > > +/* { dg-final { scan-assembler-times "vpdpbuuds\[
> > > +\\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-
> > 9\]+\[^\n\r]*%xmm\[0-9\]
> > > ++(?:\n|\[ \\t\]+#)" 1 } } */
> > > +
> > > +
> > > +#include <immintrin.h>
> > > +
> > > +volatile __m256i x,y,z;
> > > +volatile __m128i x_,y_,z_;
> > > +volatile __mmask8 m;
> > > +
> > > +void extern
> > > +avxvnniint8_test (void)
> > > +{
> > > + x = _mm256_dpbssd_epi32 (x, y, z);
> > > + x_ = _mm_dpbssd_epi32 (x_, y_, z_);
> > > +
> > > + x = _mm256_dpbssds_epi32 (x, y, z); x_ = _mm_dpbssds_epi32 (x_,
> > > + y_, z_);
> > > +
> > > + x = _mm256_dpbsud_epi32 (x, y, z);
> > > + x_ = _mm_dpbsud_epi32 (x_, y_, z_);
> > > +
> > > + x = _mm256_dpbsuds_epi32 (x, y, z); x_ = _mm_dpbsuds_epi32 (x_,
> > > + y_, z_);
> > > +
> > > + x = _mm256_dpbuud_epi32 (x, y, z);
> > > + x_ = _mm_dpbuud_epi32 (x_, y_, z_);
> > > +
> > > + x = _mm256_dpbuuds_epi32 (x, y, z);
> > > + x_ = _mm_dpbuuds_epi32 (x_, y_, z_); }
> > > diff --git a/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbssd-2.c
> > > b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbssd-2.c
> > > new file mode 100644
> > > index 00000000000..5016de39621
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbssd-2.c
> > > @@ -0,0 +1,72 @@
> > > +/* { dg-do run } */
> > > +/* { dg-options "-O2 -mavxvnniint8" } */
> > > +/* { dg-require-effective-target avxvnniint8 } */ #define AVXVNNIINT8
> > > +#ifndef CHECK #define CHECK "avx-check.h"
> > > +#endif
> > > +
> > > +#ifndef TEST
> > > +#define TEST avx_test
> > > +#endif
> > > +
> > > +#include CHECK
> > > +
> > > +static void
> > > +CALC (int *r, int *dst, char *s1, char *s2, int size) {
> > > + short tempres[32];
> > > + for (int i = 0; i < size; i++) {
> > > + tempres[i] = (short) s1[i] * (short) s2[i];
> > > + }
> > > + for (int i = 0; i < size / 4; i++) {
> > > + long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
> > > + + tempres[i * 4 + 2] + tempres[i * 4 + 3];
> > > + r[i] = test;
> > > + }
> > > +}
> > > +
> > > +void
> > > +TEST (void)
> > > +{
> > > + int i;
> > > + union256i_d res_256;
> > > + union256i_b src2_256;
> > > + union256i_b src1_256;
> > > + int res_ref_256[8];
> > > +
> > > + for (i = 0; i < 32; i++)
> > > + {
> > > + int sign = i % 2 ? 1 : -1;
> > > + src1_256.a[i] = 10 + 3 * i + sign;
> > > + src2_256.a[i] = sign * 10 * i * i;
> > > + }
> > > +
> > > + for (i = 0; i < 8; i++)
> > > + res_256.a[i] = 0x7fffffff;
> > > +
> > > + CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
> > > + res_256.x = _mm256_dpbssd_epi32 (res_256.x, src1_256.x, src2_256.x);
> > > + if (check_union256i_d (res_256, res_ref_256))
> > > + abort ();
> > > +
> > > + union128i_d res_128;
> > > + union128i_b src2_128;
> > > + union128i_b src1_128;
> > > + int res_ref_128[4];
> > > +
> > > + for (i = 0; i < 16; i++)
> > > + {
> > > + int sign = i % 2 ? 1 : -1;
> > > + src1_128.a[i] = 10 + 3 * i * i + sign;
> > > + src2_128.a[i] = sign * 10 * i * i;
> > > + }
> > > +
> > > + for (i = 0; i < 4; i++)
> > > + res_128.a[i] = 0x7fffffff;
> > > +
> > > + CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
> > > + res_128.x = _mm_dpbssd_epi32 (res_128.x, src1_128.x, src2_128.x);
> > > + if (check_union128i_d (res_128, res_ref_128))
> > > + abort ();
> > > +}
> > > diff --git a/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbssds-2.c
> > > b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbssds-2.c
> > > new file mode 100644
> > > index 00000000000..6de5062e917
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbssds-2.c
> > > @@ -0,0 +1,72 @@
> > > +/* { dg-do run } */
> > > +/* { dg-options "-O2 -mavxvnniint8" } */
> > > +/* { dg-require-effective-target avxvnniint8 } */ #define AVXVNNIINT8
> > > +#ifndef CHECK #define CHECK "avx-check.h"
> > > +#endif
> > > +
> > > +#ifndef TEST
> > > +#define TEST avx_test
> > > +#endif
> > > +
> > > +#include CHECK
> > > +
> > > +static void
> > > +CALC (int *r, int *dst, char *s1, char *s2, int size) {
> > > + short tempres[32];
> > > + for (int i = 0; i < size; i++) {
> > > + tempres[i] = (short) s1[i] * (short) s2[i];
> > > + }
> > > + for (int i = 0; i < size / 4; i++) {
> > > + long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
> > > + + tempres[i * 4 + 2] + tempres[i * 4 + 3];
> > > + r[i] = test > 0x7FFFFFFF ? 0x7FFFFFFF : test;
> > > + }
> > > +}
> > > +
> > > +void
> > > +TEST (void)
> > > +{
> > > + int i;
> > > + union256i_d res_256;
> > > + union256i_b src2_256;
> > > + union256i_b src1_256;
> > > + int res_ref_256[8];
> > > +
> > > + for (i = 0; i < 32; i++)
> > > + {
> > > + int sign = i % 2 ? 1 : -1;
> > > + src1_256.a[i] = 10 + 3 * i + sign;
> > > + src2_256.a[i] = sign * 10 * i * i;
> > > + }
> > > +
> > > + for (i = 0; i < 8; i++)
> > > + res_256.a[i] = 0x7fffffff;
> > > +
> > > + CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
> > > + res_256.x = _mm256_dpbssds_epi32 (res_256.x, src1_256.x,
> > > + src2_256.x); if (check_union256i_d (res_256, res_ref_256))
> > > + abort ();
> > > +
> > > + union128i_d res_128;
> > > + union128i_b src2_128;
> > > + union128i_b src1_128;
> > > + int res_ref_128[4];
> > > +
> > > + for (i = 0; i < 16; i++)
> > > + {
> > > + int sign = i % 2 ? 1 : -1;
> > > + src1_128.a[i] = 10 + 3 * i * i + sign;
> > > + src2_128.a[i] = sign * 10 * i * i;
> > > + }
> > > +
> > > + for (i = 0; i < 4; i++)
> > > + res_128.a[i] = 0x7fffffff;
> > > +
> > > + CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
> > > + res_128.x = _mm_dpbssds_epi32 (res_128.x, src1_128.x, src2_128.x);
> > > + if (check_union128i_d (res_128, res_ref_128))
> > > + abort ();
> > > +}
> > > diff --git a/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbsud-2.c
> > > b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbsud-2.c
> > > new file mode 100644
> > > index 00000000000..6e4ffd1c7be
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbsud-2.c
> > > @@ -0,0 +1,72 @@
> > > +/* { dg-do run } */
> > > +/* { dg-options "-O2 -mavxvnniint8" } */
> > > +/* { dg-require-effective-target avxvnniint8 } */ #define AVXVNNIINT8
> > > +#ifndef CHECK #define CHECK "avx-check.h"
> > > +#endif
> > > +
> > > +#ifndef TEST
> > > +#define TEST avx_test
> > > +#endif
> > > +
> > > +#include CHECK
> > > +
> > > +static void
> > > +CALC (int *r, int *dst, char *s1, unsigned char *s2, int size) {
> > > + short tempres[32];
> > > + for (int i = 0; i < size; i++) {
> > > + tempres[i] = (short) s1[i] * (unsigned short) s2[i];
> > > + }
> > > + for (int i = 0; i < size / 4; i++) {
> > > + long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
> > > + + tempres[i * 4 + 2] + tempres[i * 4 + 3];
> > > + r[i] = test;
> > > + }
> > > +}
> > > +
> > > +void
> > > +TEST (void)
> > > +{
> > > + int i;
> > > + union256i_d res_256;
> > > + union256i_b src1_256;
> > > + union256i_ub src2_256;
> > > + int res_ref_256[8];
> > > +
> > > + for (i = 0; i < 32; i++)
> > > + {
> > > + int sign = i % 2 ? 1 : -1;
> > > + src1_256.a[i] = 10 + 3 * i + sign;
> > > + src2_256.a[i] = sign * 10 * i * i;
> > > + }
> > > +
> > > + for (i = 0; i < 8; i++)
> > > + res_256.a[i] = 0x7fffffff;
> > > +
> > > + CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
> > > + res_256.x = _mm256_dpbsud_epi32 (res_256.x, src1_256.x, src2_256.x);
> > > + if (check_union256i_d (res_256, res_ref_256))
> > > + abort ();
> > > +
> > > + union128i_d res_128;
> > > + union128i_b src1_128;
> > > + union128i_ub src2_128;
> > > + int res_ref_128[4];
> > > +
> > > + for (i = 0; i < 16; i++)
> > > + {
> > > + int sign = i % 2 ? 1 : -1;
> > > + src1_128.a[i] = 10 + 3 * i * i + sign;
> > > + src2_128.a[i] = sign * 10 * i * i;
> > > + }
> > > +
> > > + for (i = 0; i < 4; i++)
> > > + res_128.a[i] = 0x7fffffff;
> > > +
> > > + CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
> > > + res_128.x = _mm_dpbsud_epi32 (res_128.x, src1_128.x, src2_128.x);
> > > + if (check_union128i_d (res_128, res_ref_128))
> > > + abort ();
> > > +}
> > > diff --git a/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbsuds-2.c
> > > b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbsuds-2.c
> > > new file mode 100644
> > > index 00000000000..ad4b6047ecd
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbsuds-2.c
> > > @@ -0,0 +1,72 @@
> > > +/* { dg-do run } */
> > > +/* { dg-options "-O2 -mavxvnniint8" } */
> > > +/* { dg-require-effective-target avxvnniint8 } */ #define AVXVNNIINT8
> > > +#ifndef CHECK #define CHECK "avx-check.h"
> > > +#endif
> > > +
> > > +#ifndef TEST
> > > +#define TEST avx_test
> > > +#endif
> > > +
> > > +#include CHECK
> > > +
> > > +static void
> > > +CALC (int *r, int *dst, char *s1, unsigned char *s2, int size) {
> > > + short tempres[32];
> > > + for (int i = 0; i < size; i++) {
> > > + tempres[i] = (short) s1[i] * (unsigned short) s2[i];
> > > + }
> > > + for (int i = 0; i < size / 4; i++) {
> > > + long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
> > > + + tempres[i * 4 + 2] + tempres[i * 4 + 3];
> > > + r[i] = test > 0x7FFFFFFF ? 0x7FFFFFFF : test;
> > > + }
> > > +}
> > > +
> > > +void
> > > +TEST (void)
> > > +{
> > > + int i;
> > > + union256i_d res_256;
> > > + union256i_b src1_256;
> > > + union256i_ub src2_256;
> > > + int res_ref_256[8];
> > > +
> > > + for (i = 0; i < 32; i++)
> > > + {
> > > + int sign = i % 2 ? 1 : -1;
> > > + src1_256.a[i] = 10 + 3 * i + sign;
> > > + src2_256.a[i] = sign * 10 * i * i;
> > > + }
> > > +
> > > + for (i = 0; i < 8; i++)
> > > + res_256.a[i] = 0x7fffffff;
> > > +
> > > + CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
> > > + res_256.x = _mm256_dpbsuds_epi32 (res_256.x, src1_256.x,
> > > + src2_256.x); if (check_union256i_d (res_256, res_ref_256))
> > > + abort ();
> > > +
> > > + union128i_d res_128;
> > > + union128i_b src1_128;
> > > + union128i_ub src2_128;
> > > + int res_ref_128[4];
> > > +
> > > + for (i = 0; i < 16; i++)
> > > + {
> > > + int sign = i % 2 ? 1 : -1;
> > > + src1_128.a[i] = 10 + 3 * i * i + sign;
> > > + src2_128.a[i] = sign * 10 * i * i;
> > > + }
> > > +
> > > + for (i = 0; i < 4; i++)
> > > + res_128.a[i] = 0x7fffffff;
> > > +
> > > + CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
> > > + res_128.x = _mm_dpbsuds_epi32 (res_128.x, src1_128.x, src2_128.x);
> > > + if (check_union128i_d (res_128, res_ref_128))
> > > + abort ();
> > > +}
> > > diff --git a/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbuud-2.c
> > > b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbuud-2.c
> > > new file mode 100644
> > > index 00000000000..6590915a459
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbuud-2.c
> > > @@ -0,0 +1,72 @@
> > > +/* { dg-do run } */
> > > +/* { dg-options "-O2 -mavxvnniint8" } */
> > > +/* { dg-require-effective-target avxvnniint8 } */ #define AVXVNNIINT8
> > > +#ifndef CHECK #define CHECK "avx-check.h"
> > > +#endif
> > > +
> > > +#ifndef TEST
> > > +#define TEST avx_test
> > > +#endif
> > > +
> > > +#include CHECK
> > > +
> > > +static void
> > > +CALC (unsigned int *r, unsigned int *dst, unsigned char *s1, unsigned
> > > +char *s2, int size) {
> > > + unsigned short tempres[32];
> > > + for (int i = 0; i < size; i++) {
> > > + tempres[i] = (unsigned short) s1[i] * (unsigned short) s2[i];
> > > + }
> > > + for (int i = 0; i < size / 4; i++) {
> > > + unsigned int test = (unsigned int) dst[i] + tempres[i * 4] + tempres[i * 4
> > + 1]
> > > + + tempres[i * 4 + 2] + tempres[i * 4 + 3];
> > > + r[i] = test;
> > > + }
> > > +}
> > > +
> > > +void
> > > +TEST (void)
> > > +{
> > > + int i;
> > > + union256i_ud res_256;
> > > + union256i_ub src2_256;
> > > + union256i_ub src1_256;
> > > + unsigned int res_ref_256[8];
> > > +
> > > + for (i = 0; i < 32; i++)
> > > + {
> > > + int sign = i % 2 ? 1 : -1;
> > > + src1_256.a[i] = 10 + 3 * i + sign;
> > > + src2_256.a[i] = sign * 10 * i * i;
> > > + }
> > > +
> > > + for (i = 0; i < 8; i++)
> > > + res_256.a[i] = 0x7fffffff;
> > > +
> > > + CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
> > > + res_256.x = _mm256_dpbuud_epi32 (res_256.x, src1_256.x, src2_256.x);
> > > + if (check_union256i_ud (res_256, res_ref_256))
> > > + abort ();
> > > +
> > > + union128i_ud res_128;
> > > + union128i_ub src2_128;
> > > + union128i_ub src1_128;
> > > + unsigned int res_ref_128[4];
> > > +
> > > + for (i = 0; i < 16; i++)
> > > + {
> > > + int sign = i % 2 ? 1 : -1;
> > > + src1_128.a[i] = 10 + 3 * i * i + sign;
> > > + src2_128.a[i] = sign * 10 * i * i;
> > > + }
> > > +
> > > + for (i = 0; i < 4; i++)
> > > + res_128.a[i] = 0x7fffffff;
> > > +
> > > + CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
> > > + res_128.x = _mm_dpbuud_epi32 (res_128.x, src1_128.x, src2_128.x);
> > > + if (check_union128i_ud (res_128, res_ref_128))
> > > + abort ();
> > > +}
> > > diff --git a/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbuuds-2.c
> > > b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbuuds-2.c
> > > new file mode 100644
> > > index 00000000000..970e4a5d408
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbuuds-2.c
> > > @@ -0,0 +1,72 @@
> > > +/* { dg-do run } */
> > > +/* { dg-options "-O2 -mavxvnniint8" } */
> > > +/* { dg-require-effective-target avxvnniint8 } */ #define AVXVNNIINT8
> > > +#ifndef CHECK #define CHECK "avx-check.h"
> > > +#endif
> > > +
> > > +#ifndef TEST
> > > +#define TEST avx_test
> > > +#endif
> > > +
> > > +#include CHECK
> > > +
> > > +static void
> > > +CALC (unsigned int *r, unsigned int *dst, unsigned char *s1, unsigned
> > > +char *s2, int size) {
> > > + unsigned short tempres[32];
> > > + for (int i = 0; i < size; i++) {
> > > + tempres[i] = (unsigned short) s1[i] * (unsigned short) s2[i];
> > > + }
> > > + for (int i = 0; i < size / 4; i++) {
> > > + unsigned int test = (unsigned int) dst[i] + tempres[i * 4] + tempres[i * 4
> > + 1]
> > > + + tempres[i * 4 + 2] + tempres[i * 4 + 3];
> > > + r[i] = test > 0xFFFFFFFF ? 0xFFFFFFFF : test;
> > > + }
> > > +}
> > > +
> > > +void
> > > +TEST (void)
> > > +{
> > > + int i;
> > > + union256i_ud res_256;
> > > + union256i_ub src2_256;
> > > + union256i_ub src1_256;
> > > + unsigned int res_ref_256[8];
> > > +
> > > + for (i = 0; i < 32; i++)
> > > + {
> > > + int sign = i % 2 ? 1 : -1;
> > > + src1_256.a[i] = 10 + 3 * i + sign;
> > > + src2_256.a[i] = sign * 10 * i * i;
> > > + }
> > > +
> > > + for (i = 0; i < 8; i++)
> > > + res_256.a[i] = 0x7fffffff;
> > > +
> > > + CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
> > > + res_256.x = _mm256_dpbuuds_epi32 (res_256.x, src1_256.x,
> > > + src2_256.x); if (check_union256i_ud (res_256, res_ref_256))
> > > + abort ();
> > > +
> > > + union128i_ud res_128;
> > > + union128i_ub src2_128;
> > > + union128i_ub src1_128;
> > > + unsigned int res_ref_128[4];
> > > +
> > > + for (i = 0; i < 16; i++)
> > > + {
> > > + int sign = i % 2 ? 1 : -1;
> > > + src1_128.a[i] = 10 + 3 * i * i + sign;
> > > + src2_128.a[i] = sign * 10 * i * i;
> > > + }
> > > +
> > > + for (i = 0; i < 4; i++)
> > > + res_128.a[i] = 0x7fffffff;
> > > +
> > > + CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
> > > + res_128.x = _mm_dpbuuds_epi32 (res_128.x, src1_128.x, src2_128.x);
> > > + if (check_union128i_ud (res_128, res_ref_128))
> > > + abort ();
> > > +}
> > > diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc
> > > b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
> > > index 466555c0d06..a681bffe3e7 100644
> > > --- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc
> > > +++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
> > > @@ -81,6 +81,7 @@ extern void test_widekl (void)
> > __attribute__((__target__("widekl")));
> > > extern void test_avxvnni (void)
> > __attribute__((__target__("avxvnni")));
> > > extern void test_avx512fp16 (void)
> > __attribute__((__target__("avx512fp16")));
> > > extern void test_avxifma (void)
> > __attribute__((__target__("avxifma")));
> > > +extern void test_avxvnniint8 (void)
> > __attribute__((__target__("avxvnniint8")));
> > >
> > > extern void test_no_sgx (void) __attribute__((__target__("no-
> > sgx")));
> > > extern void test_no_avx5124fmaps(void)
> > __attribute__((__target__("no-avx5124fmaps")));
> > > @@ -163,6 +164,7 @@ extern void test_no_widekl (void)
> > __attribute__((__target__("no-widekl")));
> > > extern void test_no_avxvnni (void) __attribute__((__target__("no-
> > avxvnni")));
> > > extern void test_no_avx512fp16 (void)
> > __attribute__((__target__("no-avx512fp16")));
> > > extern void test_no_avxifma (void) __attribute__((__target__("no-
> > avxifma")));
> > > +extern void test_no_avxvnniint8 (void)
> > __attribute__((__target__("no-avxvnniint8")));
> > >
> > > extern void test_arch_nocona (void)
> > __attribute__((__target__("arch=nocona")));
> > > extern void test_arch_core2 (void)
> > __attribute__((__target__("arch=core2")));
> > > diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c
> > > b/gcc/testsuite/gcc.target/i386/sse-12.c
> > > index fde56261d8f..ddde2df6657 100644
> > > --- a/gcc/testsuite/gcc.target/i386/sse-12.c
> > > +++ b/gcc/testsuite/gcc.target/i386/sse-12.c
> > > @@ -3,7 +3,7 @@
> > > popcntintrin.h gfniintrin.h and mm_malloc.h are usable
> > > with -O -std=c89 -pedantic-errors. */
> > > /* { dg-do compile } */
> > > -/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a
> > > -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm
> > > -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -
> > mrtm
> > > -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er
> > > -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves
> > > -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi
> > > -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw
> > > -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -
> > mgfni
> > > -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd
> > > -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8
> > > -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma" } */
> > > +/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a
> > > +-m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -
> > mabm
> > > +-mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma
> > > +-mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -
> > mavx512er
> > > +-mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves
> > > +-mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi
> > > +-mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw
> > > +-mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -
> > mgfni
> > > +-mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd
> > > +-mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8
> > > +-mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8" } */
> > >
> > > #include <x86intrin.h>
> > >
> > > diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c
> > > b/gcc/testsuite/gcc.target/i386/sse-13.c
> > > index bb29555babe..2b293216c6f 100644
> > > --- a/gcc/testsuite/gcc.target/i386/sse-13.c
> > > +++ b/gcc/testsuite/gcc.target/i386/sse-13.c
> > > @@ -1,5 +1,5 @@
> > > /* { dg-do compile } */
> > > -/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8
> > > -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -
> > mpopcnt
> > > -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -
> > mfma
> > > -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -
> > mavx512er
> > > -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves
> > > -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi
> > > -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw
> > > -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -
> > mpku
> > > -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -
> > mavx512bf16
> > > -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl
> > > -mwidekl -mavxvnni -mavx512fp16 -mavxifma" } */
> > > +/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8
> > > +-msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -
> > mpopcnt
> > > +-mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c
> > > +-mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f
> > > +-mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec
> > > +-mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -
> > mavx512vbmi
> > > +-mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw
> > > +-mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -
> > mpku
> > > +-msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -
> > mavx512bf16
> > > +-menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -
> > mkl
> > > +-mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8" } */
> > > /* { dg-add-options bind_pic_locally } */
> > >
> > > #include <mm_malloc.h>
> > > diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c
> > > b/gcc/testsuite/gcc.target/i386/sse-14.c
> > > index f2701ddaaf9..78b51048b90 100644
> > > --- a/gcc/testsuite/gcc.target/i386/sse-14.c
> > > +++ b/gcc/testsuite/gcc.target/i386/sse-14.c
> > > @@ -1,5 +1,5 @@
> > > /* { dg-do compile } */
> > > -/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8
> > > -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -
> > mpopcnt
> > > -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -
> > mfma
> > > -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -
> > mavx512er
> > > -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves
> > > -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma
> > > -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw
> > > -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -
> > mgfni
> > > -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd
> > > -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8
> > > -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma" } */
> > > +/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8
> > > +-msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -
> > mpopcnt
> > > +-mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c
> > > +-mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f
> > > +-mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec
> > > +-mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -
> > mavx512ifma
> > > +-mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw
> > > +-mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -
> > mgfni
> > > +-mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd
> > > +-mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8
> > > +-mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma
> > > +-mavxvnniint8" } */
> > > /* { dg-add-options bind_pic_locally } */
> > >
> > > #include <mm_malloc.h>
> > > diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c
> > > b/gcc/testsuite/gcc.target/i386/sse-22.c
> > > index 3d196975b1e..cc1c8cfa4be 100644
> > > --- a/gcc/testsuite/gcc.target/i386/sse-22.c
> > > +++ b/gcc/testsuite/gcc.target/i386/sse-22.c
> > > @@ -103,7 +103,7 @@
> > >
> > >
> > > #ifndef DIFFERENT_PRAGMAS
> > > -#pragma GCC target
> > > ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,t
> > > bm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f
> > > ,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512d
> > q
> > > ,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512
> > vpo
> > > pcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxld
> > > trk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma")
> > > +#pragma GCC target
> > > +("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,
> > > +tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx51
> > >
> > +2f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx51
> > >
> > +2dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx
> > 51
> > > +2vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,
> > > +tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxi
> > > +fma,avxvnniint8")
> > > #endif
> > >
> > > /* Following intrinsics require immediate arguments. They @@ -220,7
> > > +220,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1)
> > >
> > > /* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA)
> > */
> > > #ifdef DIFFERENT_PRAGMAS -#pragma GCC target
> > > ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,
> > >
> > sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5
> > 1
> > >
> > 24fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx5
> > > 12vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl
> > > ,avxvnni,avx512fp16,avxifma")
> > > +#pragma GCC target
> > > +("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf
> > >
> > +,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,av
> > x
> > >
> > +5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,a
> > > +vx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,wi
> > > +dekl,avxvnni,avx512fp16,avxifma,avxvnniint8")
> > > #endif
> > > #include <immintrin.h>
> > > test_1 (_cvtss_sh, unsigned short, float, 1) diff --git
> > > a/gcc/testsuite/gcc.target/i386/sse-23.c
> > > b/gcc/testsuite/gcc.target/i386/sse-23.c
> > > index d3a233f90fc..270f4483491 100644
> > > --- a/gcc/testsuite/gcc.target/i386/sse-23.c
> > > +++ b/gcc/testsuite/gcc.target/i386/sse-23.c
> > > @@ -843,6 +843,6 @@
> > > #define __builtin_ia32_vpclmulqdq_v2di(A, B, C)
> > > __builtin_ia32_vpclmulqdq_v2di(A, B, 1) #define
> > > __builtin_ia32_vpclmulqdq_v8di(A, B, C)
> > > __builtin_ia32_vpclmulqdq_v8di(A, B, 1)
> > >
> > > -#pragma GCC target
> > > ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,t
> > > bm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx
> > > 512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflusho
> > >
> > pt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5
> > 1
> > > 24vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vb
> > >
> > mi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512v
> > > p2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,av
> > > xvnni,avx512fp16,avxifma")
> > > +#pragma GCC target
> > > +("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,
> > > +tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,a
> > > +vx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflu
> > >
> > +shopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,
> > a
> > > +vx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx
> > >
> > +512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,
> > a
> > > +vx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,wi
> > > +dekl,avxvnni,avx512fp16,avxifma,avxvnniint8")
> > >
> > > #include <x86intrin.h>
> > > diff --git a/gcc/testsuite/lib/target-supports.exp
> > > b/gcc/testsuite/lib/target-supports.exp
> > > index 69de3b96bfc..64ccfc746bd 100644
> > > --- a/gcc/testsuite/lib/target-supports.exp
> > > +++ b/gcc/testsuite/lib/target-supports.exp
> > > @@ -9518,6 +9518,18 @@ proc check_effective_target_avxifma { } {
> > > } "-O0 -mavxifma" ]
> > > }
> > >
> > > +# Return 1 if avxvnniint8 instructions can be compiled.
> > > +proc check_effective_target_avxvnniint8 { } {
> > > + return [check_no_compiler_messages avxvnniint8 object {
> > > + typedef int __v8si __attribute__ ((__vector_size__ (32)));
> > > + __v8si
> > > + _mm256_dpbssd_epi32 (__v8si __A, __v8si __B, __v8si __C)
> > > + {
> > > + return __builtin_ia32_vpdpbssd256 (__A, __B, __C);
> > > + }
> > > + } "-O0 -mavxvnniint8" ]
> > > +}
> > > +
> > > # Return 1 if sse instructions can be compiled.
> > > proc check_effective_target_sse { } {
> > > return [check_no_compiler_messages sse object {
> > > --
> > > 2.18.1
> > >
> >
> >
> > --
> > BR,
> > Hongtao
@@ -795,6 +795,8 @@ get_available_features (struct __processor_model *cpu_model,
set_feature (FEATURE_AVXVNNI);
if (eax & bit_AVXIFMA)
set_feature (FEATURE_AVXIFMA);
+ if (edx & bit_AVXVNNIINT8)
+ set_feature (FEATURE_AVXVNNIINT8);
}
if (avx512_usable)
{
@@ -108,6 +108,7 @@ along with GCC; see the file COPYING3. If not see
#define OPTION_MASK_ISA2_AMX_TILE_SET OPTION_MASK_ISA2_AMX_TILE
#define OPTION_MASK_ISA2_AMX_INT8_SET OPTION_MASK_ISA2_AMX_INT8
#define OPTION_MASK_ISA2_AMX_BF16_SET OPTION_MASK_ISA2_AMX_BF16
+#define OPTION_MASK_ISA2_AVXVNNIINT8_SET OPTION_MASK_ISA2_AVXVNNIINT8
/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
as -msse4.2. */
@@ -214,7 +215,7 @@ along with GCC; see the file COPYING3. If not see
(OPTION_MASK_ISA_AVX2 | OPTION_MASK_ISA_AVX512F_UNSET)
#define OPTION_MASK_ISA2_AVX2_UNSET \
(OPTION_MASK_ISA2_AVXIFMA_UNSET | OPTION_MASK_ISA2_AVXVNNI_UNSET \
- | OPTION_MASK_ISA2_AVX512F_UNSET)
+ | OPTION_MASK_ISA2_AVXVNNIINT8_UNSET | OPTION_MASK_ISA2_AVX512F_UNSET)
#define OPTION_MASK_ISA_AVX512F_UNSET \
(OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD_UNSET \
| OPTION_MASK_ISA_AVX512PF_UNSET | OPTION_MASK_ISA_AVX512ER_UNSET \
@@ -278,6 +279,7 @@ along with GCC; see the file COPYING3. If not see
#define OPTION_MASK_ISA2_KL_UNSET \
(OPTION_MASK_ISA2_KL | OPTION_MASK_ISA2_WIDEKL_UNSET)
#define OPTION_MASK_ISA2_WIDEKL_UNSET OPTION_MASK_ISA2_WIDEKL
+#define OPTION_MASK_ISA2_AVXVNNIINT8_UNSET OPTION_MASK_ISA2_AVXVNNIINT8
/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
as -mno-sse4.1. */
@@ -1142,6 +1144,24 @@ ix86_handle_option (struct gcc_options *opts,
}
return true;
+ case OPT_mavxvnniint8:
+ if (value)
+ {
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVXVNNIINT8_SET;
+ opts->x_ix86_isa_flags2_explicit |=
+ OPTION_MASK_ISA2_AVXVNNIINT8_SET;
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2_SET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX2_SET;
+ }
+ else
+ {
+ opts->x_ix86_isa_flags2 &=
+ ~OPTION_MASK_ISA2_AVXVNNIINT8_UNSET;
+ opts->x_ix86_isa_flags2_explicit |=
+ OPTION_MASK_ISA2_AVXVNNIINT8_UNSET;
+ }
+ return true;
+
case OPT_mfma:
if (value)
{
@@ -241,6 +241,7 @@ enum processor_features
FEATURE_X86_64_V3,
FEATURE_X86_64_V4,
FEATURE_AVXIFMA,
+ FEATURE_AVXVNNIINT8,
CPU_FEATURE_MAX
};
@@ -176,4 +176,6 @@ ISA_NAMES_TABLE_START
ISA_NAMES_TABLE_ENTRY("x86-64-v3", FEATURE_X86_64_V3, P_X86_64_V3, NULL)
ISA_NAMES_TABLE_ENTRY("x86-64-v4", FEATURE_X86_64_V4, P_X86_64_V4, NULL)
ISA_NAMES_TABLE_ENTRY("avxifma", FEATURE_AVXIFMA, P_NONE, "-mavxifma")
+ ISA_NAMES_TABLE_ENTRY("avxvnniint8", FEATURE_AVXVNNIINT8,
+ P_NONE, "-mavxvnniint8")
ISA_NAMES_TABLE_END
@@ -422,7 +422,7 @@ i[34567]86-*-* | x86_64-*-*)
amxbf16intrin.h x86gprintrin.h uintrintrin.h
hresetintrin.h keylockerintrin.h avxvnniintrin.h
mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h
- avxifmaintrin.h"
+ avxifmaintrin.h avxvnniint8intrin.h"
;;
ia64-*-*)
extra_headers=ia64intrin.h
new file mode 100644
@@ -0,0 +1,138 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if !defined _IMMINTRIN_H_INCLUDED
+#error "Never use <avxvnniint8vlintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVXVNNIINT8INTRIN_H_INCLUDED
+#define _AVXVNNIINT8INTRIN_H_INCLUDED
+
+#if !defined(__AVXVNNIINT8__)
+#pragma GCC push_options
+#pragma GCC target("avxvnniint8")
+#define __DISABLE_AVXVNNIINT8__
+#endif /* __AVXVNNIINT8__ */
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_dpbssd_epi32 (__m128i __W, __m128i __A, __m128i __B)
+{
+ return (__m128i)
+ __builtin_ia32_vpdpbssd128 ((__v4si) __W, (__v4si) __A, (__v4si) __B);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_dpbssds_epi32 (__m128i __W, __m128i __A, __m128i __B)
+{
+ return (__m128i)
+ __builtin_ia32_vpdpbssds128 ((__v4si) __W, (__v4si) __A, (__v4si) __B);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_dpbsud_epi32 (__m128i __W, __m128i __A, __m128i __B)
+{
+ return (__m128i)
+ __builtin_ia32_vpdpbsud128 ((__v4si) __W, (__v4si) __A, (__v4si) __B);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_dpbsuds_epi32 (__m128i __W, __m128i __A, __m128i __B)
+{
+ return (__m128i)
+ __builtin_ia32_vpdpbsuds128 ((__v4si) __W, (__v4si) __A, (__v4si) __B);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_dpbuud_epi32 (__m128i __W, __m128i __A, __m128i __B)
+{
+ return (__m128i)
+ __builtin_ia32_vpdpbuud128 ((__v4si) __W, (__v4si) __A, (__v4si) __B);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_dpbuuds_epi32 (__m128i __W, __m128i __A, __m128i __B)
+{
+ return (__m128i)
+ __builtin_ia32_vpdpbuuds128 ((__v4si) __W, (__v4si) __A, (__v4si) __B);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_dpbssd_epi32 (__m256i __W, __m256i __A, __m256i __B)
+{
+ return (__m256i)
+ __builtin_ia32_vpdpbssd256 ((__v8si) __W, (__v8si) __A, (__v8si) __B);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_dpbssds_epi32 (__m256i __W, __m256i __A, __m256i __B)
+{
+ return (__m256i)
+ __builtin_ia32_vpdpbssds256 ((__v8si) __W, (__v8si) __A, (__v8si) __B);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_dpbsud_epi32 (__m256i __W, __m256i __A, __m256i __B)
+{
+ return (__m256i)
+ __builtin_ia32_vpdpbsud256 ((__v8si) __W, (__v8si) __A, (__v8si) __B);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_dpbsuds_epi32 (__m256i __W, __m256i __A, __m256i __B)
+{
+ return (__m256i)
+ __builtin_ia32_vpdpbsuds256 ((__v8si) __W, (__v8si) __A, (__v8si) __B);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_dpbuud_epi32 (__m256i __W, __m256i __A, __m256i __B)
+{
+ return (__m256i)
+ __builtin_ia32_vpdpbuud256 ((__v8si) __W, (__v8si) __A, (__v8si) __B);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_dpbuuds_epi32 (__m256i __W, __m256i __A, __m256i __B)
+{
+ return (__m256i)
+ __builtin_ia32_vpdpbuuds256 ((__v8si) __W, (__v8si) __A, (__v8si) __B);
+}
+
+#ifdef __DISABLE_AVXVNNIINT8__
+#undef __DISABLE_AVXVNNIINT8__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVXVNNIINT8__ */
+
+#endif /* __AVXVNNIINT8INTRIN_H_INCLUDED */
@@ -49,6 +49,7 @@
#define bit_RDRND (1 << 30)
/* %edx */
+#define bit_AVXVNNIINT8 (1 << 4)
#define bit_CMPXCHG8B (1 << 8)
#define bit_CMOV (1 << 15)
#define bit_MMX (1 << 23)
@@ -2696,6 +2696,20 @@ BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_A
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_mask, "__builtin_ia32_vpdpwssds_v4si_mask", IX86_BUILTIN_VPDPWSSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_maskz, "__builtin_ia32_vpdpwssds_v4si_maskz", IX86_BUILTIN_VPDPWSSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
+/* AVXVNNIINT8 */
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbssd_v8si, "__builtin_ia32_vpdpbssd256", IX86_BUILTIN_VPDPBSSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbssds_v8si, "__builtin_ia32_vpdpbssds256", IX86_BUILTIN_VPDPBSSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbsud_v8si, "__builtin_ia32_vpdpbsud256", IX86_BUILTIN_VPDPBSUDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbsuds_v8si, "__builtin_ia32_vpdpbsuds256", IX86_BUILTIN_VPDPBSUDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbuud_v8si, "__builtin_ia32_vpdpbuud256", IX86_BUILTIN_VPDPBUUDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbuuds_v8si, "__builtin_ia32_vpdpbuuds256", IX86_BUILTIN_VPDPBUUDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbssd_v4si, "__builtin_ia32_vpdpbssd128", IX86_BUILTIN_VPDPBSSDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbssds_v4si, "__builtin_ia32_vpdpbssds128", IX86_BUILTIN_VPDPBSSDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbsud_v4si, "__builtin_ia32_vpdpbsud128", IX86_BUILTIN_VPDPBSUDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbsuds_v4si, "__builtin_ia32_vpdpbsuds128", IX86_BUILTIN_VPDPBSUDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbuud_v4si, "__builtin_ia32_vpdpbuud128", IX86_BUILTIN_VPDPBUUDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbuuds_v4si, "__builtin_ia32_vpdpbuuds128", IX86_BUILTIN_VPDPBUUDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
+
/* VPCLMULQDQ */
BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpclmulqdq_v2di, "__builtin_ia32_vpclmulqdq_v2di", IX86_BUILTIN_VPCLMULQDQ2, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT)
BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX, 0, CODE_FOR_vpclmulqdq_v4di, "__builtin_ia32_vpclmulqdq_v4di", IX86_BUILTIN_VPCLMULQDQ4, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT)
@@ -635,6 +635,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__AVXVNNI__");
if (isa_flag2 & OPTION_MASK_ISA2_AVXIFMA)
def_or_undef (parse_in, "__AVXIFMA__");
+ if (isa_flag2 & OPTION_MASK_ISA2_AVXVNNIINT8)
+ def_or_undef (parse_in, "__AVXVNNIINT8__");
if (TARGET_IAMCU)
{
def_or_undef (parse_in, "__iamcu");
@@ -110,3 +110,4 @@ DEF_PTA(WIDEKL)
DEF_PTA(AVXVNNI)
DEF_PTA(AVX512FP16)
DEF_PTA(AVXIFMA)
+DEF_PTA(AVXVNNIINT8)
@@ -227,7 +227,8 @@ static struct ix86_target_opts isa2_opts[] =
{ "-mwidekl", OPTION_MASK_ISA2_WIDEKL },
{ "-mavxvnni", OPTION_MASK_ISA2_AVXVNNI },
{ "-mavx512fp16", OPTION_MASK_ISA2_AVX512FP16 },
- { "-mavxifma", OPTION_MASK_ISA2_AVXIFMA }
+ { "-mavxifma", OPTION_MASK_ISA2_AVXIFMA },
+ { "-mavxvnniint8", OPTION_MASK_ISA2_AVXVNNIINT8 }
};
static struct ix86_target_opts isa_opts[] =
{
@@ -1074,6 +1075,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
IX86_ATTR_ISA ("avxvnni", OPT_mavxvnni),
IX86_ATTR_ISA ("avx512fp16", OPT_mavx512fp16),
IX86_ATTR_ISA ("avxifma", OPT_mavxifma),
+ IX86_ATTR_ISA ("avxvnniint8", OPT_mavxvnniint8),
/* enum options */
IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
@@ -1219,3 +1219,8 @@ mavxifma
Target Mask(ISA2_AVXIFMA) Var(ix86_isa_flags2) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, and
AVXIFMA built-in functions and code generation.
+
+mavxvnniint8
+Target Mask(ISA2_AVXVNNIINT8) Var(ix86_isa_flags2) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and
+AVXVNNIINT8 built-in functions and code generation.
@@ -46,6 +46,8 @@
#include <avxifmaintrin.h>
+#include <avxvnniint8intrin.h>
+
#include <avx2intrin.h>
#include <avx512fintrin.h>
@@ -200,6 +200,13 @@
UNSPEC_COMPLEX_FCMUL
UNSPEC_COMPLEX_MASK
+ ;; For AVX-VNNI-INT8 support
+ UNSPEC_VPDPBSSD
+ UNSPEC_VPDPBSSDS
+ UNSPEC_VPDPBSUD
+ UNSPEC_VPDPBSUDS
+ UNSPEC_VPDPBUUD
+ UNSPEC_VPDPBUUDS
])
(define_c_enum "unspecv" [
@@ -29241,3 +29248,27 @@
gcc_unreachable ();
DONE;
})
+
+(define_int_iterator VPDOTPROD
+ [UNSPEC_VPDPBSSD
+ UNSPEC_VPDPBSSDS
+ UNSPEC_VPDPBSUD
+ UNSPEC_VPDPBSUDS
+ UNSPEC_VPDPBUUD
+ UNSPEC_VPDPBUUDS])
+
+(define_int_attr vpdotprodtype
+ [(UNSPEC_VPDPBSSD "bssd") (UNSPEC_VPDPBSSDS "bssds")
+ (UNSPEC_VPDPBSUD "bsud") (UNSPEC_VPDPBSUDS "bsuds")
+ (UNSPEC_VPDPBUUD "buud") (UNSPEC_VPDPBUUDS "buuds")])
+
+(define_insn "vpdp<vpdotprodtype>_<mode>"
+ [(set (match_operand:VI4_AVX 0 "register_operand" "=x")
+ (unspec:VI4_AVX
+ [(match_operand:VI4_AVX 1 "register_operand" "0")
+ (match_operand:VI4_AVX 2 "register_operand" "x")
+ (match_operand:VI4_AVX 3 "nonimmediate_operand" "xm")]
+ VPDOTPROD))]
+ "TARGET_AVXVNNIINT8"
+ "vpdp<vpdotprodtype>\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "prefix" "vex")])
@@ -7065,6 +7065,11 @@ Enable/disable the generation of the AVXVNNI instructions.
@cindex @code{target("avxifma")} function attribute, x86
Enable/disable the generation of the AVXIFMA instructions.
+@item avxvnniint8
+@itemx no-avxvnniint8
+@cindex @code{target("avxvnniint8")} function attribute, x86
+Enable/disable the generation of the AVXVNNIINT8 instructions.
+
@item cld
@itemx no-cld
@cindex @code{target("cld")} function attribute, x86
@@ -1436,7 +1436,7 @@ See RS/6000 and PowerPC Options.
-mavx5124fmaps -mavx512vnni -mavx5124vnniw -mprfchw -mrdpid @gol
-mrdseed -msgx -mavx512vp2intersect -mserialize -mtsxldtrk@gol
-mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset -mavxvnni@gol
--mavx512fp16 -mavxifma @gol
+-mavx512fp16 -mavxifma -mavxvnniint8 @gol
-mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops @gol
-minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol
-mkl -mwidekl @gol
@@ -32896,6 +32896,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
@need 200
@itemx -mavxifma
@opindex mavxifma
+@need 200
+@itemx -mavxvnniint8
+@opindex mavxvnniint8
These switches enable the use of instructions in the MMX, SSE,
SSE2, SSE3, SSSE3, SSE4, SSE4A, SSE4.1, SSE4.2, AVX, AVX2, AVX512F, AVX512PF,
AVX512ER, AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA,
@@ -32906,8 +32909,8 @@ XSAVEOPT, XSAVEC, XSAVES, RTM, HLE, TBM, MWAITX, CLZERO, PKU, AVX512VBMI2,
GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16,
ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, SERIALIZE,
UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512FP16,
-AVXIFMA or CLDEMOTE extended instruction sets. Each has a corresponding
-@option{-mno-} option to disable use of these instructions.
+AVXIFMA, AVXVNNIINT8 or CLDEMOTE extended instruction sets. Each has a
+corresponding @option{-mno-} option to disable use of these instructions.
These extensions are also available as built-in functions: see
@ref{x86 Built-in Functions}, for details of the functions enabled and
@@ -2493,6 +2493,9 @@ Target supports the execution of @code{avx512vp2intersect} instructions.
@item avxifma
Target supports the execution of @code{avxifma} instructions.
+@item avxvnniint8
+Target supports the execution of @code{avxvnniint8} instructions.
+
@item amx_tile
Target supports the execution of @code{amx-tile} instructions.
@@ -1,5 +1,5 @@
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma" } */
+/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
@@ -1,5 +1,5 @@
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma" } */
+/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
@@ -25,6 +25,9 @@ main ()
&& avx_os_support ()
#ifdef AVXIFMA
&& __builtin_cpu_supports ("avxifma")
+#endif
+#ifdef AVXVNNIINT8
+ && __builtin_cpu_supports ("avxvnniint8")
#endif
)
{
new file mode 100644
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-mavxvnniint8 -O2" } */
+/* { dg-final { scan-assembler-times "vpdpbssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpbssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpbssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpbssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpbsud\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpbsud\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpbsuds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpbsuds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpbuud\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpbuud\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpbuuds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpbuuds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+
+
+#include <immintrin.h>
+
+volatile __m256i x,y,z;
+volatile __m128i x_,y_,z_;
+volatile __mmask8 m;
+
+void extern
+avxvnniint8_test (void)
+{
+ x = _mm256_dpbssd_epi32 (x, y, z);
+ x_ = _mm_dpbssd_epi32 (x_, y_, z_);
+
+ x = _mm256_dpbssds_epi32 (x, y, z);
+ x_ = _mm_dpbssds_epi32 (x_, y_, z_);
+
+ x = _mm256_dpbsud_epi32 (x, y, z);
+ x_ = _mm_dpbsud_epi32 (x_, y_, z_);
+
+ x = _mm256_dpbsuds_epi32 (x, y, z);
+ x_ = _mm_dpbsuds_epi32 (x_, y_, z_);
+
+ x = _mm256_dpbuud_epi32 (x, y, z);
+ x_ = _mm_dpbuud_epi32 (x_, y_, z_);
+
+ x = _mm256_dpbuuds_epi32 (x, y, z);
+ x_ = _mm_dpbuuds_epi32 (x_, y_, z_);
+}
new file mode 100644
@@ -0,0 +1,72 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavxvnniint8" } */
+/* { dg-require-effective-target avxvnniint8 } */
+#define AVXVNNIINT8
+#ifndef CHECK
+#define CHECK "avx-check.h"
+#endif
+
+#ifndef TEST
+#define TEST avx_test
+#endif
+
+#include CHECK
+
+static void
+CALC (int *r, int *dst, char *s1, char *s2, int size)
+{
+ short tempres[32];
+ for (int i = 0; i < size; i++) {
+ tempres[i] = (short) s1[i] * (short) s2[i];
+ }
+ for (int i = 0; i < size / 4; i++) {
+ long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
+ + tempres[i * 4 + 2] + tempres[i * 4 + 3];
+ r[i] = test;
+ }
+}
+
+void
+TEST (void)
+{
+ int i;
+ union256i_d res_256;
+ union256i_b src2_256;
+ union256i_b src1_256;
+ int res_ref_256[8];
+
+ for (i = 0; i < 32; i++)
+ {
+ int sign = i % 2 ? 1 : -1;
+ src1_256.a[i] = 10 + 3 * i + sign;
+ src2_256.a[i] = sign * 10 * i * i;
+ }
+
+ for (i = 0; i < 8; i++)
+ res_256.a[i] = 0x7fffffff;
+
+ CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
+ res_256.x = _mm256_dpbssd_epi32 (res_256.x, src1_256.x, src2_256.x);
+ if (check_union256i_d (res_256, res_ref_256))
+ abort ();
+
+ union128i_d res_128;
+ union128i_b src2_128;
+ union128i_b src1_128;
+ int res_ref_128[4];
+
+ for (i = 0; i < 16; i++)
+ {
+ int sign = i % 2 ? 1 : -1;
+ src1_128.a[i] = 10 + 3 * i * i + sign;
+ src2_128.a[i] = sign * 10 * i * i;
+ }
+
+ for (i = 0; i < 4; i++)
+ res_128.a[i] = 0x7fffffff;
+
+ CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
+ res_128.x = _mm_dpbssd_epi32 (res_128.x, src1_128.x, src2_128.x);
+ if (check_union128i_d (res_128, res_ref_128))
+ abort ();
+}
new file mode 100644
@@ -0,0 +1,72 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavxvnniint8" } */
+/* { dg-require-effective-target avxvnniint8 } */
+#define AVXVNNIINT8
+#ifndef CHECK
+#define CHECK "avx-check.h"
+#endif
+
+#ifndef TEST
+#define TEST avx_test
+#endif
+
+#include CHECK
+
+static void
+CALC (int *r, int *dst, char *s1, char *s2, int size)
+{
+ short tempres[32];
+ for (int i = 0; i < size; i++) {
+ tempres[i] = (short) s1[i] * (short) s2[i];
+ }
+ for (int i = 0; i < size / 4; i++) {
+ long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
+ + tempres[i * 4 + 2] + tempres[i * 4 + 3];
+ r[i] = test > 0x7FFFFFFF ? 0x7FFFFFFF : test;
+ }
+}
+
+void
+TEST (void)
+{
+ int i;
+ union256i_d res_256;
+ union256i_b src2_256;
+ union256i_b src1_256;
+ int res_ref_256[8];
+
+ for (i = 0; i < 32; i++)
+ {
+ int sign = i % 2 ? 1 : -1;
+ src1_256.a[i] = 10 + 3 * i + sign;
+ src2_256.a[i] = sign * 10 * i * i;
+ }
+
+ for (i = 0; i < 8; i++)
+ res_256.a[i] = 0x7fffffff;
+
+ CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
+ res_256.x = _mm256_dpbssds_epi32 (res_256.x, src1_256.x, src2_256.x);
+ if (check_union256i_d (res_256, res_ref_256))
+ abort ();
+
+ union128i_d res_128;
+ union128i_b src2_128;
+ union128i_b src1_128;
+ int res_ref_128[4];
+
+ for (i = 0; i < 16; i++)
+ {
+ int sign = i % 2 ? 1 : -1;
+ src1_128.a[i] = 10 + 3 * i * i + sign;
+ src2_128.a[i] = sign * 10 * i * i;
+ }
+
+ for (i = 0; i < 4; i++)
+ res_128.a[i] = 0x7fffffff;
+
+ CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
+ res_128.x = _mm_dpbssds_epi32 (res_128.x, src1_128.x, src2_128.x);
+ if (check_union128i_d (res_128, res_ref_128))
+ abort ();
+}
new file mode 100644
@@ -0,0 +1,72 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavxvnniint8" } */
+/* { dg-require-effective-target avxvnniint8 } */
+#define AVXVNNIINT8
+#ifndef CHECK
+#define CHECK "avx-check.h"
+#endif
+
+#ifndef TEST
+#define TEST avx_test
+#endif
+
+#include CHECK
+
+static void
+CALC (int *r, int *dst, char *s1, unsigned char *s2, int size)
+{
+ short tempres[32];
+ for (int i = 0; i < size; i++) {
+ tempres[i] = (short) s1[i] * (unsigned short) s2[i];
+ }
+ for (int i = 0; i < size / 4; i++) {
+ long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
+ + tempres[i * 4 + 2] + tempres[i * 4 + 3];
+ r[i] = test;
+ }
+}
+
+void
+TEST (void)
+{
+ int i;
+ union256i_d res_256;
+ union256i_b src1_256;
+ union256i_ub src2_256;
+ int res_ref_256[8];
+
+ for (i = 0; i < 32; i++)
+ {
+ int sign = i % 2 ? 1 : -1;
+ src1_256.a[i] = 10 + 3 * i + sign;
+ src2_256.a[i] = sign * 10 * i * i;
+ }
+
+ for (i = 0; i < 8; i++)
+ res_256.a[i] = 0x7fffffff;
+
+ CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
+ res_256.x = _mm256_dpbsud_epi32 (res_256.x, src1_256.x, src2_256.x);
+ if (check_union256i_d (res_256, res_ref_256))
+ abort ();
+
+ union128i_d res_128;
+ union128i_b src1_128;
+ union128i_ub src2_128;
+ int res_ref_128[4];
+
+ for (i = 0; i < 16; i++)
+ {
+ int sign = i % 2 ? 1 : -1;
+ src1_128.a[i] = 10 + 3 * i * i + sign;
+ src2_128.a[i] = sign * 10 * i * i;
+ }
+
+ for (i = 0; i < 4; i++)
+ res_128.a[i] = 0x7fffffff;
+
+ CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
+ res_128.x = _mm_dpbsud_epi32 (res_128.x, src1_128.x, src2_128.x);
+ if (check_union128i_d (res_128, res_ref_128))
+ abort ();
+}
new file mode 100644
@@ -0,0 +1,72 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavxvnniint8" } */
+/* { dg-require-effective-target avxvnniint8 } */
+#define AVXVNNIINT8
+#ifndef CHECK
+#define CHECK "avx-check.h"
+#endif
+
+#ifndef TEST
+#define TEST avx_test
+#endif
+
+#include CHECK
+
+static void
+CALC (int *r, int *dst, char *s1, unsigned char *s2, int size)
+{
+ short tempres[32];
+ for (int i = 0; i < size; i++) {
+ tempres[i] = (short) s1[i] * (unsigned short) s2[i];
+ }
+ for (int i = 0; i < size / 4; i++) {
+ long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
+ + tempres[i * 4 + 2] + tempres[i * 4 + 3];
+ r[i] = test > 0x7FFFFFFF ? 0x7FFFFFFF : test;
+ }
+}
+
+void
+TEST (void)
+{
+ int i;
+ union256i_d res_256;
+ union256i_b src1_256;
+ union256i_ub src2_256;
+ int res_ref_256[8];
+
+ for (i = 0; i < 32; i++)
+ {
+ int sign = i % 2 ? 1 : -1;
+ src1_256.a[i] = 10 + 3 * i + sign;
+ src2_256.a[i] = sign * 10 * i * i;
+ }
+
+ for (i = 0; i < 8; i++)
+ res_256.a[i] = 0x7fffffff;
+
+ CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
+ res_256.x = _mm256_dpbsuds_epi32 (res_256.x, src1_256.x, src2_256.x);
+ if (check_union256i_d (res_256, res_ref_256))
+ abort ();
+
+ union128i_d res_128;
+ union128i_b src1_128;
+ union128i_ub src2_128;
+ int res_ref_128[4];
+
+ for (i = 0; i < 16; i++)
+ {
+ int sign = i % 2 ? 1 : -1;
+ src1_128.a[i] = 10 + 3 * i * i + sign;
+ src2_128.a[i] = sign * 10 * i * i;
+ }
+
+ for (i = 0; i < 4; i++)
+ res_128.a[i] = 0x7fffffff;
+
+ CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
+ res_128.x = _mm_dpbsuds_epi32 (res_128.x, src1_128.x, src2_128.x);
+ if (check_union128i_d (res_128, res_ref_128))
+ abort ();
+}
new file mode 100644
@@ -0,0 +1,72 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavxvnniint8" } */
+/* { dg-require-effective-target avxvnniint8 } */
+#define AVXVNNIINT8
+#ifndef CHECK
+#define CHECK "avx-check.h"
+#endif
+
+#ifndef TEST
+#define TEST avx_test
+#endif
+
+#include CHECK
+
+static void
+CALC (unsigned int *r, unsigned int *dst, unsigned char *s1, unsigned char *s2, int size)
+{
+ unsigned short tempres[32];
+ for (int i = 0; i < size; i++) {
+ tempres[i] = (unsigned short) s1[i] * (unsigned short) s2[i];
+ }
+ for (int i = 0; i < size / 4; i++) {
+ unsigned int test = (unsigned int) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
+ + tempres[i * 4 + 2] + tempres[i * 4 + 3];
+ r[i] = test;
+ }
+}
+
+void
+TEST (void)
+{
+ int i;
+ union256i_ud res_256;
+ union256i_ub src2_256;
+ union256i_ub src1_256;
+ unsigned int res_ref_256[8];
+
+ for (i = 0; i < 32; i++)
+ {
+ int sign = i % 2 ? 1 : -1;
+ src1_256.a[i] = 10 + 3 * i + sign;
+ src2_256.a[i] = sign * 10 * i * i;
+ }
+
+ for (i = 0; i < 8; i++)
+ res_256.a[i] = 0x7fffffff;
+
+ CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
+ res_256.x = _mm256_dpbuud_epi32 (res_256.x, src1_256.x, src2_256.x);
+ if (check_union256i_ud (res_256, res_ref_256))
+ abort ();
+
+ union128i_ud res_128;
+ union128i_ub src2_128;
+ union128i_ub src1_128;
+ unsigned int res_ref_128[4];
+
+ for (i = 0; i < 16; i++)
+ {
+ int sign = i % 2 ? 1 : -1;
+ src1_128.a[i] = 10 + 3 * i * i + sign;
+ src2_128.a[i] = sign * 10 * i * i;
+ }
+
+ for (i = 0; i < 4; i++)
+ res_128.a[i] = 0x7fffffff;
+
+ CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
+ res_128.x = _mm_dpbuud_epi32 (res_128.x, src1_128.x, src2_128.x);
+ if (check_union128i_ud (res_128, res_ref_128))
+ abort ();
+}
new file mode 100644
@@ -0,0 +1,72 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavxvnniint8" } */
+/* { dg-require-effective-target avxvnniint8 } */
+#define AVXVNNIINT8
+#ifndef CHECK
+#define CHECK "avx-check.h"
+#endif
+
+#ifndef TEST
+#define TEST avx_test
+#endif
+
+#include CHECK
+
+static void
+CALC (unsigned int *r, unsigned int *dst, unsigned char *s1, unsigned char *s2, int size)
+{
+ unsigned short tempres[32];
+ for (int i = 0; i < size; i++) {
+ tempres[i] = (unsigned short) s1[i] * (unsigned short) s2[i];
+ }
+ for (int i = 0; i < size / 4; i++) {
+ unsigned int test = (unsigned int) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
+ + tempres[i * 4 + 2] + tempres[i * 4 + 3];
+ r[i] = test > 0xFFFFFFFF ? 0xFFFFFFFF : test;
+ }
+}
+
+void
+TEST (void)
+{
+ int i;
+ union256i_ud res_256;
+ union256i_ub src2_256;
+ union256i_ub src1_256;
+ unsigned int res_ref_256[8];
+
+ for (i = 0; i < 32; i++)
+ {
+ int sign = i % 2 ? 1 : -1;
+ src1_256.a[i] = 10 + 3 * i + sign;
+ src2_256.a[i] = sign * 10 * i * i;
+ }
+
+ for (i = 0; i < 8; i++)
+ res_256.a[i] = 0x7fffffff;
+
+ CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
+ res_256.x = _mm256_dpbuuds_epi32 (res_256.x, src1_256.x, src2_256.x);
+ if (check_union256i_ud (res_256, res_ref_256))
+ abort ();
+
+ union128i_ud res_128;
+ union128i_ub src2_128;
+ union128i_ub src1_128;
+ unsigned int res_ref_128[4];
+
+ for (i = 0; i < 16; i++)
+ {
+ int sign = i % 2 ? 1 : -1;
+ src1_128.a[i] = 10 + 3 * i * i + sign;
+ src2_128.a[i] = sign * 10 * i * i;
+ }
+
+ for (i = 0; i < 4; i++)
+ res_128.a[i] = 0x7fffffff;
+
+ CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
+ res_128.x = _mm_dpbuuds_epi32 (res_128.x, src1_128.x, src2_128.x);
+ if (check_union128i_ud (res_128, res_ref_128))
+ abort ();
+}
@@ -81,6 +81,7 @@ extern void test_widekl (void) __attribute__((__target__("widekl")));
extern void test_avxvnni (void) __attribute__((__target__("avxvnni")));
extern void test_avx512fp16 (void) __attribute__((__target__("avx512fp16")));
extern void test_avxifma (void) __attribute__((__target__("avxifma")));
+extern void test_avxvnniint8 (void) __attribute__((__target__("avxvnniint8")));
extern void test_no_sgx (void) __attribute__((__target__("no-sgx")));
extern void test_no_avx5124fmaps(void) __attribute__((__target__("no-avx5124fmaps")));
@@ -163,6 +164,7 @@ extern void test_no_widekl (void) __attribute__((__target__("no-widekl")));
extern void test_no_avxvnni (void) __attribute__((__target__("no-avxvnni")));
extern void test_no_avx512fp16 (void) __attribute__((__target__("no-avx512fp16")));
extern void test_no_avxifma (void) __attribute__((__target__("no-avxifma")));
+extern void test_no_avxvnniint8 (void) __attribute__((__target__("no-avxvnniint8")));
extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona")));
extern void test_arch_core2 (void) __attribute__((__target__("arch=core2")));
@@ -3,7 +3,7 @@
popcntintrin.h gfniintrin.h and mm_malloc.h are usable
with -O -std=c89 -pedantic-errors. */
/* { dg-do compile } */
-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma" } */
+/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8" } */
#include <x86intrin.h>
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma" } */
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma" } */
+/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
@@ -103,7 +103,7 @@
#ifndef DIFFERENT_PRAGMAS
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8")
#endif
/* Following intrinsics require immediate arguments. They
@@ -220,7 +220,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1)
/* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */
#ifdef DIFFERENT_PRAGMAS
-#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma")
+#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8")
#endif
#include <immintrin.h>
test_1 (_cvtss_sh, unsigned short, float, 1)
@@ -843,6 +843,6 @@
#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1)
#define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1)
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8")
#include <x86intrin.h>
@@ -9518,6 +9518,18 @@ proc check_effective_target_avxifma { } {
} "-O0 -mavxifma" ]
}
+# Return 1 if avxvnniint8 instructions can be compiled.
+proc check_effective_target_avxvnniint8 { } {
+ return [check_no_compiler_messages avxvnniint8 object {
+ typedef int __v8si __attribute__ ((__vector_size__ (32)));
+ __v8si
+ _mm256_dpbssd_epi32 (__v8si __A, __v8si __B, __v8si __C)
+ {
+ return __builtin_ia32_vpdpbssd256 (__A, __B, __C);
+ }
+ } "-O0 -mavxvnniint8" ]
+}
+
# Return 1 if sse instructions can be compiled.
proc check_effective_target_sse { } {
return [check_no_compiler_messages sse object {