[4/6] Support Intel AVX-NE-CONVERT

Message ID 20221014075445.7938-5-haochen.jiang@intel.com
State Accepted
Headers
Series Add Intel Sierra Forest Instructions |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

Jiang, Haochen Oct. 14, 2022, 7:54 a.m. UTC
  From: Kong Lingling <lingling.kong@intel.com>

gcc/ChangeLog:

	* common/config/i386/i386-common.cc
	(OPTION_MASK_ISA2_AVXNECONVERT_SET,
	OPTION_MASK_ISA2_AVXNECONVERT_UNSET): New.
	(ix86_handle_option): Handle -mavxneconvert, unset
	avxneconvert when avx2 is disabled.
	* common/config/i386/i386-cpuinfo.h (processor_types): Add
	FEATURE_AVXNECONVERT.
	* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
	avxneconvert.
	* common/config/i386/cpuinfo.h (get_available_features):
	Detect avxneconvert.
	* config.gcc: Add avxneconvertintrin.h
	* config/i386/avxneconvertintrin.h: New.
	* config/i386/cpuid.h (bit_AVXNECONVERT): New.
	* config/i386/i386-builtin-types.def: Add
	DEF_POINTER_TYPE (PCV8HF, V8HF, CONST),
	DEF_POINTER_TYPE (PCV16HF, V16HF, CONST),
	DEF_FUNCTION_TYPE (V4SF, PCSHORT),
	DEF_FUNCTION_TYPE (V8SF, PCSHORT),
	DEF_FUNCTION_TYPE (V4SF, PCV8BF),
	DEF_FUNCTION_TYPE (V4SF, PCV8BF),
	DEF_FUNCTION_TYPE (V8SF, PCV16HF),
	DEF_FUNCTION_TYPE (V8SF, PCV16BF).
	* config/i386/i386-builtin.def: Add new builtins.
	* config/i386/i386-c.cc (ix86_target_macros_internal): Define
	__AVXNECONVERT__.
	* config/i386/i386-expand.cc (ix86_expand_special_args_builtin):
	Handle V4SF_FTYPE_PCSHORT,V8SF_FTYPE_PCSHORT,V4SF_FTYPE_PCV8BF,
	V4SF_FTYPE_PCV8HF,V8SF_FTYPE_PCV16BF,V8SF_FTYPE_PCV16HF.
	* config/i386/i386-isa.def : Add DEF_PTA(AVXNECONVERT) New.
	* config/i386/i386-options.cc (isa2_opts): Add -mavxneconvert.
	(ix86_valid_target_attribute_inner_p): Handle avxneconvert.
	* config/i386/i386.opt: Add option -mavxneconvert.
	* config/i386/immintrin.h: Inculde avxneconvertintrin.h.
	* config/i386/sse.md: (avx_vbcstne<vbcstnetype>2ps_<mode>),
	(avx_vcvtne<vcvtnebf16type>2ps_<mode>),
	(avx_vcvtne<vcvtnephtype>2ps_<mode>),
	(avx_vcvtneps2bf16_<mode>): New define_insn
	(avx512f_cvtneps2bf16_<mode>):Ditto.
	(avx512f_cvtneps2bf16_<mode>_mask):Ditto.
	* doc/invoke.texi: Document -mavxneconvert.
	* doc/extend.texi: Document avxneconvert.
	* doc/sourcebuild.texi: Document target avxneconvert.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/avx-check.h: Add avxneconvert check.
	* gcc.target/i386/funcspec-56.inc: Add new target attribute.
	* gcc.target/i386/sse-12.c: Add -mavxneconvert.
	* gcc.target/i386/sse-13.c: Ditto.
	* gcc.target/i386/sse-14.c: Ditto.
	* gcc.target/i386/sse-22.c: Ditto.
	* gcc.target/i386/sse-23.c: Ditto.
	* g++.dg/other/i386-2.C: Ditto.
	* g++.dg/other/i386-3.C: Ditto.
	* lib/target-supports.exp:add check_effective_target_avxneconvert.
	* gcc.target/i386/avx-ne-convert-1.c: New test.
	* gcc.target/i386/avx-ne-convert-vbcstnebf162ps-2.c: Ditto.
	* gcc.target/i386/avx-ne-convert-vbcstnesh2ps-2.c: Ditto.
	* gcc.target/i386/avx-ne-convert-vcvtneebf162ps-2.c: Ditto.
	* gcc.target/i386/avx-ne-convert-vcvtneeph2ps-2.c: Ditto.
	* gcc.target/i386/avx-ne-convert-vcvtneobf162ps-2.c: Ditto.
	* gcc.target/i386/avx-ne-convert-vcvtneoph2ps-2.c: Ditto.
	* gcc.target/i386/avx-ne-convert-vcvtneps2bf16-2.c: Ditto.
	* gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1.c: Rename..
	* gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1a.c: To this.
	* gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1b.c: New test.
---
 gcc/common/config/i386/cpuinfo.h              |   2 +
 gcc/common/config/i386/i386-common.cc         |  21 ++-
 gcc/common/config/i386/i386-cpuinfo.h         |   1 +
 gcc/common/config/i386/i386-isas.h            |   2 +
 gcc/config.gcc                                |   2 +-
 gcc/config/i386/avxneconvertintrin.h          | 140 ++++++++++++++++++
 gcc/config/i386/cpuid.h                       |   1 +
 gcc/config/i386/i386-builtin-types.def        |  17 +++
 gcc/config/i386/i386-builtin.def              |  18 +++
 gcc/config/i386/i386-c.cc                     |   2 +
 gcc/config/i386/i386-expand.cc                |   8 +
 gcc/config/i386/i386-isa.def                  |   1 +
 gcc/config/i386/i386-options.cc               |   4 +-
 gcc/config/i386/i386.opt                      |   5 +
 gcc/config/i386/immintrin.h                   |   4 +
 gcc/config/i386/sse.md                        | 100 ++++++++++++-
 gcc/doc/extend.texi                           |   5 +
 gcc/doc/invoke.texi                           |   9 +-
 gcc/doc/sourcebuild.texi                      |   3 +
 gcc/testsuite/g++.dg/other/i386-2.C           |   2 +-
 gcc/testsuite/g++.dg/other/i386-3.C           |   2 +-
 gcc/testsuite/gcc.target/i386/avx-check.h     |   3 +
 .../gcc.target/i386/avx-ne-convert-1.c        |  45 ++++++
 .../i386/avx-ne-convert-vbcstnebf162ps-2.c    |  54 +++++++
 .../i386/avx-ne-convert-vbcstnesh2ps-2.c      |  42 ++++++
 .../i386/avx-ne-convert-vcvtneebf162ps-2.c    |  73 +++++++++
 .../i386/avx-ne-convert-vcvtneeph2ps-2.c      |  66 +++++++++
 .../i386/avx-ne-convert-vcvtneobf162ps-2.c    |  75 ++++++++++
 .../i386/avx-ne-convert-vcvtneoph2ps-2.c      |  66 +++++++++
 .../i386/avx-ne-convert-vcvtneps2bf16-2.c     |  58 ++++++++
 ...16-1.c => avx512bf16vl-vcvtneps2bf16-1a.c} |   0
 .../i386/avx512bf16vl-vcvtneps2bf16-1b.c      |  27 ++++
 gcc/testsuite/gcc.target/i386/funcspec-56.inc |   2 +
 gcc/testsuite/gcc.target/i386/sse-12.c        |   2 +-
 gcc/testsuite/gcc.target/i386/sse-13.c        |   2 +-
 gcc/testsuite/gcc.target/i386/sse-14.c        |   2 +-
 gcc/testsuite/gcc.target/i386/sse-22.c        |   4 +-
 gcc/testsuite/gcc.target/i386/sse-23.c        |   2 +-
 gcc/testsuite/lib/target-supports.exp         |  12 ++
 39 files changed, 868 insertions(+), 16 deletions(-)
 create mode 100644 gcc/config/i386/avxneconvertintrin.h
 create mode 100644 gcc/testsuite/gcc.target/i386/avx-ne-convert-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx-ne-convert-vbcstnebf162ps-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx-ne-convert-vbcstnesh2ps-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneebf162ps-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneeph2ps-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneobf162ps-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneoph2ps-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneps2bf16-2.c
 rename gcc/testsuite/gcc.target/i386/{avx512bf16vl-vcvtneps2bf16-1.c => avx512bf16vl-vcvtneps2bf16-1a.c} (100%)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1b.c
  

Comments

Hongtao Liu Oct. 17, 2022, 5:46 a.m. UTC | #1
On Fri, Oct 14, 2022 at 3:58 PM Haochen Jiang via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> From: Kong Lingling <lingling.kong@intel.com>
>
> gcc/ChangeLog:
>
>         * common/config/i386/i386-common.cc
>         (OPTION_MASK_ISA2_AVXNECONVERT_SET,
>         OPTION_MASK_ISA2_AVXNECONVERT_UNSET): New.
>         (ix86_handle_option): Handle -mavxneconvert, unset
>         avxneconvert when avx2 is disabled.
>         * common/config/i386/i386-cpuinfo.h (processor_types): Add
>         FEATURE_AVXNECONVERT.
>         * common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
>         avxneconvert.
>         * common/config/i386/cpuinfo.h (get_available_features):
>         Detect avxneconvert.
>         * config.gcc: Add avxneconvertintrin.h
>         * config/i386/avxneconvertintrin.h: New.
>         * config/i386/cpuid.h (bit_AVXNECONVERT): New.
>         * config/i386/i386-builtin-types.def: Add
>         DEF_POINTER_TYPE (PCV8HF, V8HF, CONST),
>         DEF_POINTER_TYPE (PCV16HF, V16HF, CONST),
>         DEF_FUNCTION_TYPE (V4SF, PCSHORT),
>         DEF_FUNCTION_TYPE (V8SF, PCSHORT),
>         DEF_FUNCTION_TYPE (V4SF, PCV8BF),
>         DEF_FUNCTION_TYPE (V4SF, PCV8BF),
>         DEF_FUNCTION_TYPE (V8SF, PCV16HF),
>         DEF_FUNCTION_TYPE (V8SF, PCV16BF).
>         * config/i386/i386-builtin.def: Add new builtins.
>         * config/i386/i386-c.cc (ix86_target_macros_internal): Define
>         __AVXNECONVERT__.
>         * config/i386/i386-expand.cc (ix86_expand_special_args_builtin):
>         Handle V4SF_FTYPE_PCSHORT,V8SF_FTYPE_PCSHORT,V4SF_FTYPE_PCV8BF,
>         V4SF_FTYPE_PCV8HF,V8SF_FTYPE_PCV16BF,V8SF_FTYPE_PCV16HF.
>         * config/i386/i386-isa.def : Add DEF_PTA(AVXNECONVERT) New.
>         * config/i386/i386-options.cc (isa2_opts): Add -mavxneconvert.
>         (ix86_valid_target_attribute_inner_p): Handle avxneconvert.
>         * config/i386/i386.opt: Add option -mavxneconvert.
>         * config/i386/immintrin.h: Inculde avxneconvertintrin.h.
>         * config/i386/sse.md: (avx_vbcstne<vbcstnetype>2ps_<mode>),
>         (avx_vcvtne<vcvtnebf16type>2ps_<mode>),
>         (avx_vcvtne<vcvtnephtype>2ps_<mode>),
>         (avx_vcvtneps2bf16_<mode>): New define_insn
>         (avx512f_cvtneps2bf16_<mode>):Ditto.
>         (avx512f_cvtneps2bf16_<mode>_mask):Ditto.
>         * doc/invoke.texi: Document -mavxneconvert.
>         * doc/extend.texi: Document avxneconvert.
>         * doc/sourcebuild.texi: Document target avxneconvert.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/avx-check.h: Add avxneconvert check.
>         * gcc.target/i386/funcspec-56.inc: Add new target attribute.
>         * gcc.target/i386/sse-12.c: Add -mavxneconvert.
>         * gcc.target/i386/sse-13.c: Ditto.
>         * gcc.target/i386/sse-14.c: Ditto.
>         * gcc.target/i386/sse-22.c: Ditto.
>         * gcc.target/i386/sse-23.c: Ditto.
>         * g++.dg/other/i386-2.C: Ditto.
>         * g++.dg/other/i386-3.C: Ditto.
>         * lib/target-supports.exp:add check_effective_target_avxneconvert.
>         * gcc.target/i386/avx-ne-convert-1.c: New test.
>         * gcc.target/i386/avx-ne-convert-vbcstnebf162ps-2.c: Ditto.
>         * gcc.target/i386/avx-ne-convert-vbcstnesh2ps-2.c: Ditto.
>         * gcc.target/i386/avx-ne-convert-vcvtneebf162ps-2.c: Ditto.
>         * gcc.target/i386/avx-ne-convert-vcvtneeph2ps-2.c: Ditto.
>         * gcc.target/i386/avx-ne-convert-vcvtneobf162ps-2.c: Ditto.
>         * gcc.target/i386/avx-ne-convert-vcvtneoph2ps-2.c: Ditto.
>         * gcc.target/i386/avx-ne-convert-vcvtneps2bf16-2.c: Ditto.
>         * gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1.c: Rename..
>         * gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1a.c: To this.
>         * gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1b.c: New test.
> ---
>  gcc/common/config/i386/cpuinfo.h              |   2 +
>  gcc/common/config/i386/i386-common.cc         |  21 ++-
>  gcc/common/config/i386/i386-cpuinfo.h         |   1 +
>  gcc/common/config/i386/i386-isas.h            |   2 +
>  gcc/config.gcc                                |   2 +-
>  gcc/config/i386/avxneconvertintrin.h          | 140 ++++++++++++++++++
>  gcc/config/i386/cpuid.h                       |   1 +
>  gcc/config/i386/i386-builtin-types.def        |  17 +++
>  gcc/config/i386/i386-builtin.def              |  18 +++
>  gcc/config/i386/i386-c.cc                     |   2 +
>  gcc/config/i386/i386-expand.cc                |   8 +
>  gcc/config/i386/i386-isa.def                  |   1 +
>  gcc/config/i386/i386-options.cc               |   4 +-
>  gcc/config/i386/i386.opt                      |   5 +
>  gcc/config/i386/immintrin.h                   |   4 +
>  gcc/config/i386/sse.md                        | 100 ++++++++++++-
>  gcc/doc/extend.texi                           |   5 +
>  gcc/doc/invoke.texi                           |   9 +-
>  gcc/doc/sourcebuild.texi                      |   3 +
>  gcc/testsuite/g++.dg/other/i386-2.C           |   2 +-
>  gcc/testsuite/g++.dg/other/i386-3.C           |   2 +-
>  gcc/testsuite/gcc.target/i386/avx-check.h     |   3 +
>  .../gcc.target/i386/avx-ne-convert-1.c        |  45 ++++++
>  .../i386/avx-ne-convert-vbcstnebf162ps-2.c    |  54 +++++++
>  .../i386/avx-ne-convert-vbcstnesh2ps-2.c      |  42 ++++++
>  .../i386/avx-ne-convert-vcvtneebf162ps-2.c    |  73 +++++++++
>  .../i386/avx-ne-convert-vcvtneeph2ps-2.c      |  66 +++++++++
>  .../i386/avx-ne-convert-vcvtneobf162ps-2.c    |  75 ++++++++++
>  .../i386/avx-ne-convert-vcvtneoph2ps-2.c      |  66 +++++++++
>  .../i386/avx-ne-convert-vcvtneps2bf16-2.c     |  58 ++++++++
>  ...16-1.c => avx512bf16vl-vcvtneps2bf16-1a.c} |   0
>  .../i386/avx512bf16vl-vcvtneps2bf16-1b.c      |  27 ++++
>  gcc/testsuite/gcc.target/i386/funcspec-56.inc |   2 +
>  gcc/testsuite/gcc.target/i386/sse-12.c        |   2 +-
>  gcc/testsuite/gcc.target/i386/sse-13.c        |   2 +-
>  gcc/testsuite/gcc.target/i386/sse-14.c        |   2 +-
>  gcc/testsuite/gcc.target/i386/sse-22.c        |   4 +-
>  gcc/testsuite/gcc.target/i386/sse-23.c        |   2 +-
>  gcc/testsuite/lib/target-supports.exp         |  12 ++
>  39 files changed, 868 insertions(+), 16 deletions(-)
>  create mode 100644 gcc/config/i386/avxneconvertintrin.h
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx-ne-convert-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx-ne-convert-vbcstnebf162ps-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx-ne-convert-vbcstnesh2ps-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneebf162ps-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneeph2ps-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneobf162ps-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneoph2ps-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneps2bf16-2.c
>  rename gcc/testsuite/gcc.target/i386/{avx512bf16vl-vcvtneps2bf16-1.c => avx512bf16vl-vcvtneps2bf16-1a.c} (100%)
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1b.c
>
> diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
> index bed88003f8e..e9fd586704d 100644
> --- a/gcc/common/config/i386/cpuinfo.h
> +++ b/gcc/common/config/i386/cpuinfo.h
> @@ -797,6 +797,8 @@ get_available_features (struct __processor_model *cpu_model,
>             set_feature (FEATURE_AVXIFMA);
>           if (edx & bit_AVXVNNIINT8)
>             set_feature (FEATURE_AVXVNNIINT8);
> +         if (edx & bit_AVXNECONVERT)
> +           set_feature (FEATURE_AVXNECONVERT);
>         }
>        if (avx512_usable)
>         {
> diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
> index 6a2a7e3d25a..f9c906f75cf 100644
> --- a/gcc/common/config/i386/i386-common.cc
> +++ b/gcc/common/config/i386/i386-common.cc
> @@ -109,6 +109,7 @@ along with GCC; see the file COPYING3.  If not see
>  #define OPTION_MASK_ISA2_AMX_INT8_SET OPTION_MASK_ISA2_AMX_INT8
>  #define OPTION_MASK_ISA2_AMX_BF16_SET OPTION_MASK_ISA2_AMX_BF16
>  #define OPTION_MASK_ISA2_AVXVNNIINT8_SET OPTION_MASK_ISA2_AVXVNNIINT8
> +#define OPTION_MASK_ISA2_AVXNECONVERT_SET OPTION_MASK_ISA2_AVXNECONVERT
>
>  /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
>     as -msse4.2.  */
> @@ -215,7 +216,8 @@ along with GCC; see the file COPYING3.  If not see
>    (OPTION_MASK_ISA_AVX2 | OPTION_MASK_ISA_AVX512F_UNSET)
>  #define OPTION_MASK_ISA2_AVX2_UNSET \
>    (OPTION_MASK_ISA2_AVXIFMA_UNSET | OPTION_MASK_ISA2_AVXVNNI_UNSET \
> -   | OPTION_MASK_ISA2_AVXVNNIINT8_UNSET | OPTION_MASK_ISA2_AVX512F_UNSET)
> +   | OPTION_MASK_ISA2_AVXVNNIINT8_UNSET | OPTION_MASK_ISA2_AVXNECONVERT_UNSET \
> +   | OPTION_MASK_ISA2_AVX512F_UNSET)
>  #define OPTION_MASK_ISA_AVX512F_UNSET \
>    (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD_UNSET \
>     | OPTION_MASK_ISA_AVX512PF_UNSET | OPTION_MASK_ISA_AVX512ER_UNSET \
> @@ -280,6 +282,7 @@ along with GCC; see the file COPYING3.  If not see
>    (OPTION_MASK_ISA2_KL | OPTION_MASK_ISA2_WIDEKL_UNSET)
>  #define OPTION_MASK_ISA2_WIDEKL_UNSET OPTION_MASK_ISA2_WIDEKL
>  #define OPTION_MASK_ISA2_AVXVNNIINT8_UNSET OPTION_MASK_ISA2_AVXVNNIINT8
> +#define OPTION_MASK_ISA2_AVXNECONVERT_UNSET OPTION_MASK_ISA2_AVXNECONVERT
>
>  /* SSE4 includes both SSE4.1 and SSE4.2.  -mno-sse4 should the same
>     as -mno-sse4.1. */
> @@ -1162,6 +1165,22 @@ ix86_handle_option (struct gcc_options *opts,
>         }
>        return true;
>
> +    case OPT_mavxneconvert:
> +      if (value)
> +       {
> +         opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVXNECONVERT_SET;
> +         opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVXNECONVERT_SET;
> +         opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2_SET;
> +         opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX2_SET;
> +       }
> +      else
> +       {
> +         opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVXNECONVERT_UNSET;
> +         opts->x_ix86_isa_flags2_explicit
> +           |= OPTION_MASK_ISA2_AVXNECONVERT_UNSET;
> +       }
> +      return true;
> +
>      case OPT_mfma:
>        if (value)
>         {
> diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h
> index 9a6b92fab79..2d3fbfc817a 100644
> --- a/gcc/common/config/i386/i386-cpuinfo.h
> +++ b/gcc/common/config/i386/i386-cpuinfo.h
> @@ -242,6 +242,7 @@ enum processor_features
>    FEATURE_X86_64_V4,
>    FEATURE_AVXIFMA,
>    FEATURE_AVXVNNIINT8,
> +  FEATURE_AVXNECONVERT,
>    CPU_FEATURE_MAX
>  };
>
> diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h
> index 8c1f351056c..bceaee589ee 100644
> --- a/gcc/common/config/i386/i386-isas.h
> +++ b/gcc/common/config/i386/i386-isas.h
> @@ -178,4 +178,6 @@ ISA_NAMES_TABLE_START
>    ISA_NAMES_TABLE_ENTRY("avxifma", FEATURE_AVXIFMA, P_NONE, "-mavxifma")
>    ISA_NAMES_TABLE_ENTRY("avxvnniint8", FEATURE_AVXVNNIINT8,
>                         P_NONE, "-mavxvnniint8")
> +  ISA_NAMES_TABLE_ENTRY("avxneconvert", FEATURE_AVXNECONVERT,
> +                       P_NONE, "-mavxneconvert")
>  ISA_NAMES_TABLE_END
> diff --git a/gcc/config.gcc b/gcc/config.gcc
> index 4df78238910..840b62aee61 100644
> --- a/gcc/config.gcc
> +++ b/gcc/config.gcc
> @@ -422,7 +422,7 @@ i[34567]86-*-* | x86_64-*-*)
>                        amxbf16intrin.h x86gprintrin.h uintrintrin.h
>                        hresetintrin.h keylockerintrin.h avxvnniintrin.h
>                        mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h
> -                      avxifmaintrin.h avxvnniint8intrin.h"
> +                      avxifmaintrin.h avxvnniint8intrin.h avxneconvertintrin.h"
>         ;;
>  ia64-*-*)
>         extra_headers=ia64intrin.h
> diff --git a/gcc/config/i386/avxneconvertintrin.h b/gcc/config/i386/avxneconvertintrin.h
> new file mode 100644
> index 00000000000..30199384725
> --- /dev/null
> +++ b/gcc/config/i386/avxneconvertintrin.h
> @@ -0,0 +1,140 @@
> +/* Copyright (C) 2021 Free Software Foundation, Inc.
> +
> +   This file is part of GCC.
> +
> +   GCC is free software; you can redistribute it and/or modify
> +   it under the terms of the GNU General Public License as published by
> +   the Free Software Foundation; either version 3, or (at your option)
> +   any later version.
> +
> +   GCC is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +   GNU General Public License for more details.
> +
> +   Under Section 7 of GPL version 3, you are granted additional
> +   permissions described in the GCC Runtime Library Exception, version
> +   3.1, as published by the Free Software Foundation.
> +
> +   You should have received a copy of the GNU General Public License and
> +   a copy of the GCC Runtime Library Exception along with this program;
> +   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#ifndef _IMMINTRIN_H_INCLUDED
> +#error "Never use <avxneconvertintrin.h> directly; include <immintrin.h> instead."
> +#endif
> +
> +#ifndef _AVXNECONVERTINTRIN_H_INCLUDED
> +#define _AVXNECONVERTINTRIN_H_INCLUDED
> +
> +#ifndef __AVXNECONVERT__
> +#pragma GCC push_options
> +#pragma GCC target ("avxneconvert")
> +#define __DISABLE_AVXNECONVERT__
> +#endif /* __AVXNECONVERT__ */
> +
> +extern __inline __m128
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_bcstnebf16_ps (const void *__P)
> +{
> +  return (__m128) __builtin_ia32_vbcstnebf162ps128 ((const short *) __P);
> +}
> +
> +extern __inline __m256
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_bcstnebf16_ps (const void *__P)
> +{
> +  return (__m256) __builtin_ia32_vbcstnebf162ps256 ((const short *) __P);
> +}
> +
> +extern __inline __m128
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_bcstnesh_ps (const void *__P)
> +{
> +  return (__m128) __builtin_ia32_vbcstnesh2ps128 ((const short *) __P);
> +}
> +
> +extern __inline __m256
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_bcstnesh_ps (const void *__P)
> +{
> +  return (__m256) __builtin_ia32_vbcstnesh2ps256 ((const short *) __P);
> +}
> +
> +extern __inline __m128
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvtneebf16_ps (const __m128bf16 *__A)
> +{
> +  return (__m128) __builtin_ia32_vcvtneebf162ps128 ((const __v8bf *) __A);
> +}
> +
> +extern __inline __m256
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_cvtneebf16_ps (const __m256bf16 *__A)
> +{
> +  return (__m256) __builtin_ia32_vcvtneebf162ps256 ((const __v16bf *) __A);
> +}
> +
> +extern __inline __m128
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvtneeph_ps (const __m128h *__A)
> +{
> +  return (__m128) __builtin_ia32_vcvtneeph2ps128 ((const __v8hf *) __A);
> +}
> +
> +extern __inline __m256
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_cvtneeph_ps (const __m256h *__A)
> +{
> +  return (__m256) __builtin_ia32_vcvtneeph2ps256 ((const __v16hf *) __A);
> +}
> +
> +extern __inline __m128
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvtneobf16_ps (const __m128bf16 *__A)
> +{
> +  return (__m128) __builtin_ia32_vcvtneobf162ps128 ((const __v8bf *) __A);
> +}
> +
> +extern __inline __m256
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_cvtneobf16_ps (const __m256bf16 *__A)
> +{
> +  return (__m256) __builtin_ia32_vcvtneobf162ps256 ((const __v16bf *) __A);
> +}
> +
> +extern __inline __m128
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvtneoph_ps (const __m128h *__A)
> +{
> +  return (__m128) __builtin_ia32_vcvtneoph2ps128 ((const __v8hf *) __A);
> +}
> +
> +extern __inline __m256
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_cvtneoph_ps (const __m256h *__A)
> +{
> +  return (__m256) __builtin_ia32_vcvtneoph2ps256 ((const __v16hf *) __A);
> +}
> +
> +extern __inline __m128bf16
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvtneps_avx_pbh (__m128 __A)
> +{
> +  return (__m128bf16) __builtin_ia32_vcvtneps2bf16128 (__A);
> +}
> +
> +extern __inline __m128bf16
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_cvtneps_avx_pbh (__m256 __A)
> +{
> +  return (__m128bf16) __builtin_ia32_vcvtneps2bf16256 (__A);
> +}
> +
> +#ifdef __DISABLE_AVXNECONVERT__
> +#undef __DISABLE_AVXNECONVERT__
> +#pragma GCC pop_options
> +#endif /* __DISABLE_AVXNECONVERT__ */
> +
> +#endif /* _AVXNECONVERTINTRIN_H_INCLUDED */
> diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
> index f5fad22149a..18bbc0cb7be 100644
> --- a/gcc/config/i386/cpuid.h
> +++ b/gcc/config/i386/cpuid.h
> @@ -50,6 +50,7 @@
>
>  /* %edx */
>  #define bit_AVXVNNIINT8 (1 << 4)
> +#define bit_AVXNECONVERT (1 << 5)
>  #define bit_CMPXCHG8B  (1 << 8)
>  #define bit_CMOV       (1 << 15)
>  #define bit_MMX                (1 << 23)
> diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
> index 63a360b0f8b..ebf6e5b4ad8 100644
> --- a/gcc/config/i386/i386-builtin-types.def
> +++ b/gcc/config/i386/i386-builtin-types.def
> @@ -87,6 +87,7 @@ DEF_VECTOR_TYPE (V8QI, QI)
>  DEF_VECTOR_TYPE (V2DF, DOUBLE)
>  DEF_VECTOR_TYPE (V4SF, FLOAT)
>  DEF_VECTOR_TYPE (V8HF, FLOAT16)
> +DEF_VECTOR_TYPE (V8BF, BFLOAT16)
>  DEF_VECTOR_TYPE (V2DI, DI)
>  DEF_VECTOR_TYPE (V4SI, SI)
>  DEF_VECTOR_TYPE (V8HI, HI)
> @@ -100,6 +101,7 @@ DEF_VECTOR_TYPE (V16UQI, UQI, V16QI)
>  DEF_VECTOR_TYPE (V4DF, DOUBLE)
>  DEF_VECTOR_TYPE (V8SF, FLOAT)
>  DEF_VECTOR_TYPE (V16HF, FLOAT16)
> +DEF_VECTOR_TYPE (V16BF, BFLOAT16)
>  DEF_VECTOR_TYPE (V4DI, DI)
>  DEF_VECTOR_TYPE (V8SI, SI)
>  DEF_VECTOR_TYPE (V16HI, HI)
> @@ -111,6 +113,7 @@ DEF_VECTOR_TYPE (V16UHI, UHI, V16HI)
>  # AVX512F vectors
>  DEF_VECTOR_TYPE (V32SF, FLOAT)
>  DEF_VECTOR_TYPE (V32HF, FLOAT16)
> +DEF_VECTOR_TYPE (V32BF, BFLOAT16)
>  DEF_VECTOR_TYPE (V16SF, FLOAT)
>  DEF_VECTOR_TYPE (V8DF, DOUBLE)
>  DEF_VECTOR_TYPE (V8DI, DI)
> @@ -179,6 +182,10 @@ DEF_POINTER_TYPE (PCV4DF, V4DF, CONST)
>  DEF_POINTER_TYPE (PCV4SF, V4SF, CONST)
>  DEF_POINTER_TYPE (PCV8DF, V8DF, CONST)
>  DEF_POINTER_TYPE (PCV8SF, V8SF, CONST)
> +DEF_POINTER_TYPE (PCV8HF, V8HF, CONST)
> +DEF_POINTER_TYPE (PCV8BF, V8BF, CONST)
> +DEF_POINTER_TYPE (PCV16HF, V16HF, CONST)
> +DEF_POINTER_TYPE (PCV16BF, V16BF, CONST)
>  DEF_POINTER_TYPE (PCV16SF, V16SF, CONST)
>
>  DEF_POINTER_TYPE (PCV2DI, V2DI, CONST)
> @@ -254,12 +261,14 @@ DEF_FUNCTION_TYPE (V4DF, V4SI)
>  DEF_FUNCTION_TYPE (V8DF, V8DF)
>  DEF_FUNCTION_TYPE (V4HI, V4HI)
>  DEF_FUNCTION_TYPE (V4SF, PCFLOAT)
> +DEF_FUNCTION_TYPE (V4SF, PCSHORT)
>  DEF_FUNCTION_TYPE (V4SF, V2DF)
>  DEF_FUNCTION_TYPE (V4SF, V2DF, V4SF, UQI)
>  DEF_FUNCTION_TYPE (V4SF, V4DF)
>  DEF_FUNCTION_TYPE (V4SF, V4DF, V4SF, UQI)
>  DEF_FUNCTION_TYPE (V4SF, V4SF)
>  DEF_FUNCTION_TYPE (V4SF, PCV4SF)
> +DEF_FUNCTION_TYPE (V4SF, PCV8HF)
>  DEF_FUNCTION_TYPE (V4SF, V4SI)
>  DEF_FUNCTION_TYPE (V4SF, V8SF)
>  DEF_FUNCTION_TYPE (V4SF, V8HI)
> @@ -275,8 +284,10 @@ DEF_FUNCTION_TYPE (V8HI, V16QI)
>  DEF_FUNCTION_TYPE (V8HI, V8HI)
>  DEF_FUNCTION_TYPE (V8QI, V8QI)
>  DEF_FUNCTION_TYPE (V8SF, PCFLOAT)
> +DEF_FUNCTION_TYPE (V8SF, PCSHORT)
>  DEF_FUNCTION_TYPE (V8SF, PCV4SF)
>  DEF_FUNCTION_TYPE (V8SF, PCV8SF)
> +DEF_FUNCTION_TYPE (V8SF, PCV16HF)
>  DEF_FUNCTION_TYPE (V8SF, V4SF)
>  DEF_FUNCTION_TYPE (V8SF, V8SF)
>  DEF_FUNCTION_TYPE (V8SF, V8SI)
> @@ -1389,3 +1400,9 @@ DEF_FUNCTION_TYPE (V32HF, V32HF)
>  DEF_FUNCTION_TYPE_ALIAS (V8HF_FTYPE_V8HF, ROUND)
>  DEF_FUNCTION_TYPE_ALIAS (V16HF_FTYPE_V16HF, ROUND)
>  DEF_FUNCTION_TYPE_ALIAS (V32HF_FTYPE_V32HF, ROUND)
> +
> +# AVXNECONVERT builtins
> +DEF_FUNCTION_TYPE (V8BF, V8SF)
> +DEF_FUNCTION_TYPE (V8BF, V4SF)
> +DEF_FUNCTION_TYPE (V4SF, PCV8BF)
> +DEF_FUNCTION_TYPE (V8SF, PCV16BF)
> diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
> index e6edae5728b..a429577180c 100644
> --- a/gcc/config/i386/i386-builtin.def
> +++ b/gcc/config/i386/i386-builtin.def
> @@ -274,6 +274,20 @@ BDESC (OPTION_MASK_ISA_RTM, 0, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BU
>  BDESC (OPTION_MASK_ISA_RTM, 0, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID)
>  BDESC (OPTION_MASK_ISA_RTM, 0, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID)
>
> +/* AVX-NE-CONVERT */
> +BDESC (0, OPTION_MASK_ISA2_AVXNECONVERT, CODE_FOR_vbcstnebf162ps_v4sf, "__builtin_ia32_vbcstnebf162ps128", IX86_BUILTIN_VBCSTNEBF162PS128, UNKNOWN, (int) V4SF_FTYPE_PCSHORT)
> +BDESC (0, OPTION_MASK_ISA2_AVXNECONVERT, CODE_FOR_vbcstnebf162ps_v8sf, "__builtin_ia32_vbcstnebf162ps256", IX86_BUILTIN_VBCSTNEBF162PS256, UNKNOWN, (int) V8SF_FTYPE_PCSHORT)
> +BDESC (0, OPTION_MASK_ISA2_AVXNECONVERT, CODE_FOR_vbcstnesh2ps_v4sf, "__builtin_ia32_vbcstnesh2ps128", IX86_BUILTIN_VBCSTNESH2PS128, UNKNOWN, (int) V4SF_FTYPE_PCSHORT)
> +BDESC (0, OPTION_MASK_ISA2_AVXNECONVERT, CODE_FOR_vbcstnesh2ps_v8sf, "__builtin_ia32_vbcstnesh2ps256", IX86_BUILTIN_VBCSTNESH2PS256, UNKNOWN, (int) V8SF_FTYPE_PCSHORT)
> +BDESC (0, OPTION_MASK_ISA2_AVXNECONVERT, CODE_FOR_vcvtneebf162ps_v4sf, "__builtin_ia32_vcvtneebf162ps128", IX86_BUILTIN_VCVTNEEBF162PS128, UNKNOWN, (int) V4SF_FTYPE_PCV8BF)
> +BDESC (0, OPTION_MASK_ISA2_AVXNECONVERT, CODE_FOR_vcvtneebf162ps_v8sf, "__builtin_ia32_vcvtneebf162ps256", IX86_BUILTIN_VCVTNEEBF162PS256, UNKNOWN, (int) V8SF_FTYPE_PCV16BF)
> +BDESC (0, OPTION_MASK_ISA2_AVXNECONVERT, CODE_FOR_vcvtneeph2ps_v4sf, "__builtin_ia32_vcvtneeph2ps128", IX86_BUILTIN_VCVTNEEPH2PS128, UNKNOWN, (int) V4SF_FTYPE_PCV8HF)
> +BDESC (0, OPTION_MASK_ISA2_AVXNECONVERT, CODE_FOR_vcvtneeph2ps_v8sf, "__builtin_ia32_vcvtneeph2ps256", IX86_BUILTIN_VCVTNEEPH2PS256, UNKNOWN, (int) V8SF_FTYPE_PCV16HF)
> +BDESC (0, OPTION_MASK_ISA2_AVXNECONVERT, CODE_FOR_vcvtneobf162ps_v4sf, "__builtin_ia32_vcvtneobf162ps128", IX86_BUILTIN_VCVTNEOBF162PS128, UNKNOWN, (int) V4SF_FTYPE_PCV8BF)
> +BDESC (0, OPTION_MASK_ISA2_AVXNECONVERT, CODE_FOR_vcvtneobf162ps_v8sf, "__builtin_ia32_vcvtneobf162ps256", IX86_BUILTIN_VCVTNEOBF162PS256, UNKNOWN, (int) V8SF_FTYPE_PCV16BF)
> +BDESC (0, OPTION_MASK_ISA2_AVXNECONVERT, CODE_FOR_vcvtneoph2ps_v4sf, "__builtin_ia32_vcvtneoph2ps128", IX86_BUILTIN_VCVTNEOPH2PS128, UNKNOWN, (int) V4SF_FTYPE_PCV8HF)
> +BDESC (0, OPTION_MASK_ISA2_AVXNECONVERT, CODE_FOR_vcvtneoph2ps_v8sf, "__builtin_ia32_vcvtneoph2ps256", IX86_BUILTIN_VCVTNEOPH2PS256, UNKNOWN, (int) V8SF_FTYPE_PCV16HF)
> +
>  /* AVX512BW */
>  BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_loadv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCSHORT_V32HI_USI)
>  BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_loadv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCCHAR_V64QI_UDI)
> @@ -2809,6 +2823,10 @@ BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v4sf, "__builti
>  BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v4sf_mask, "__builtin_ia32_dpbf16ps_v4sf_mask", IX86_BUILTIN_DPHI16PS_V4SF_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V8HI_V8HI_UQI)
>  BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v4sf_maskz, "__builtin_ia32_dpbf16ps_v4sf_maskz", IX86_BUILTIN_DPHI16PS_V4SF_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V8HI_V8HI_UQI)
>
> +/* AVX-NE-CONVERT */
> +BDESC (0, OPTION_MASK_ISA2_AVXNECONVERT, CODE_FOR_avx_vcvtneps2bf16_v4sf, "__builtin_ia32_vcvtneps2bf16128", IX86_BUILTIN_VCVTNEPS2BF16128, UNKNOWN, (int) V8BF_FTYPE_V4SF)
> +BDESC (0, OPTION_MASK_ISA2_AVXNECONVERT, CODE_FOR_avx_vcvtneps2bf16_v8sf, "__builtin_ia32_vcvtneps2bf16256", IX86_BUILTIN_VCVTNEPS2BF16256, UNKNOWN, (int) V8BF_FTYPE_V8SF)
> +
>  /* AVX512FP16.  */
>  BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv8hf3_mask, "__builtin_ia32_addph128_mask", IX86_BUILTIN_ADDPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
>  BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv16hf3_mask, "__builtin_ia32_addph256_mask", IX86_BUILTIN_ADDPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
> diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
> index a9a35c0a18a..48934df664c 100644
> --- a/gcc/config/i386/i386-c.cc
> +++ b/gcc/config/i386/i386-c.cc
> @@ -637,6 +637,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
>      def_or_undef (parse_in, "__AVXIFMA__");
>    if (isa_flag2 & OPTION_MASK_ISA2_AVXVNNIINT8)
>      def_or_undef (parse_in, "__AVXVNNIINT8__");
> +  if (isa_flag2 & OPTION_MASK_ISA2_AVXNECONVERT)
> +    def_or_undef (parse_in, "__AVXNECONVERT__");
>    if (TARGET_IAMCU)
>      {
>        def_or_undef (parse_in, "__iamcu");
> diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> index a0f8a98986e..1e29fe584af 100644
> --- a/gcc/config/i386/i386-expand.cc
> +++ b/gcc/config/i386/i386-expand.cc
> @@ -10427,7 +10427,9 @@ ix86_expand_args_builtin (const struct builtin_description *d,
>      case V4DI_FTYPE_V4DI:
>      case V16HI_FTYPE_V16SF:
>      case V8HI_FTYPE_V8SF:
> +    case V8BF_FTYPE_V8SF:
>      case V8HI_FTYPE_V4SF:
> +    case V8BF_FTYPE_V4SF:
>        nargs = 1;
>        break;
>      case V4SF_FTYPE_V4SF_VEC_MERGE:
> @@ -11860,6 +11862,12 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
>      case V8SF_FTYPE_PCV4SF:
>      case V8SF_FTYPE_PCFLOAT:
>      case V4SF_FTYPE_PCFLOAT:
> +    case V4SF_FTYPE_PCSHORT:
> +    case V4SF_FTYPE_PCV8BF:
> +    case V4SF_FTYPE_PCV8HF:
> +    case V8SF_FTYPE_PCSHORT:
> +    case V8SF_FTYPE_PCV16BF:
> +    case V8SF_FTYPE_PCV16HF:
>      case V4DF_FTYPE_PCV2DF:
>      case V4DF_FTYPE_PCDOUBLE:
>      case V2DF_FTYPE_PCDOUBLE:
> diff --git a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def
> index c95b917c6ce..4ea3f96f69f 100644
> --- a/gcc/config/i386/i386-isa.def
> +++ b/gcc/config/i386/i386-isa.def
> @@ -111,3 +111,4 @@ DEF_PTA(AVXVNNI)
>  DEF_PTA(AVX512FP16)
>  DEF_PTA(AVXIFMA)
>  DEF_PTA(AVXVNNIINT8)
> +DEF_PTA(AVXNECONVERT)
> diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
> index 3e6d04433a6..e59e2d8aeaf 100644
> --- a/gcc/config/i386/i386-options.cc
> +++ b/gcc/config/i386/i386-options.cc
> @@ -228,7 +228,8 @@ static struct ix86_target_opts isa2_opts[] =
>    { "-mavxvnni",       OPTION_MASK_ISA2_AVXVNNI },
>    { "-mavx512fp16",    OPTION_MASK_ISA2_AVX512FP16 },
>    { "-mavxifma",       OPTION_MASK_ISA2_AVXIFMA },
> -  { "-mavxvnniint8",   OPTION_MASK_ISA2_AVXVNNIINT8 }
> +  { "-mavxvnniint8",   OPTION_MASK_ISA2_AVXVNNIINT8 },
> +  { "-mavxneconvert",   OPTION_MASK_ISA2_AVXNECONVERT }
>  };
>  static struct ix86_target_opts isa_opts[] =
>  {
> @@ -1076,6 +1077,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
>      IX86_ATTR_ISA ("avx512fp16", OPT_mavx512fp16),
>      IX86_ATTR_ISA ("avxifma", OPT_mavxifma),
>      IX86_ATTR_ISA ("avxvnniint8", OPT_mavxvnniint8),
> +    IX86_ATTR_ISA ("avxneconvert",   OPT_mavxneconvert),
>
>      /* enum options */
>      IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
> diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
> index 53d534f6392..6e07b89ac4c 100644
> --- a/gcc/config/i386/i386.opt
> +++ b/gcc/config/i386/i386.opt
> @@ -1224,3 +1224,8 @@ mavxvnniint8
>  Target Mask(ISA2_AVXVNNIINT8) Var(ix86_isa_flags2) Save
>  Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and
>  AVXVNNIINT8 built-in functions and code generation.
> +
> +mavxneconvert
> +Target Mask(ISA2_AVXNECONVERT) Var(ix86_isa_flags2) Save
> +Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, and
> +AVXNECONVERT build-in functions and code generation.
> diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
> index c62d50f1951..d7433f639c8 100644
> --- a/gcc/config/i386/immintrin.h
> +++ b/gcc/config/i386/immintrin.h
> @@ -124,6 +124,10 @@
>  #include <avx512bf16intrin.h>
>  #endif
>
> +#ifdef __AVX2__
> +#include <avxneconvertintrin.h>
> +#endif
> +
>  #include <amxtileintrin.h>
>
>  #include <amxint8intrin.h>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 49490a213ea..bef4447de62 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -171,6 +171,14 @@
>    UNSPEC_VPMADDWDACCD
>    UNSPEC_VPMADDWDACCSSD
>
> +  ;; For AVXNECONVERT support
> +  UNSPEC_VCVTNEBF16SF
> +  UNSPEC_VCVTNESHSF
> +  UNSPEC_VCVTNEEBF16SF
> +  UNSPEC_VCVTNEEPHSF
> +  UNSPEC_VCVTNEOBF16SF
> +  UNSPEC_VCVTNEOPHSF
> +
>    ;; For VAES support
>    UNSPEC_VAESDEC
>    UNSPEC_VAESDECLAST
> @@ -28930,9 +28938,69 @@
>  ;; Converting from SF to BF
>  (define_mode_attr sf_cvt_bf16
>    [(V4SF  "V8HI") (V8SF  "V8HI") (V16SF  "V16HI")])
> +(define_mode_attr sf_cvt_bfloat16
> +  [(V4SF  "V8BF") (V8SF  "V8BF")])
>  ;; Mapping from BF to SF
>  (define_mode_attr sf_bf16
>    [(V4SF  "V8HI") (V8SF  "V16HI") (V16SF  "V32HI")])
> +(define_mode_attr sf_bfloat16
> +  [(V4SF  "V8BF") (V8SF  "V16BF") (V16SF  "V32BF")])
> +;; Mapping from PH to SF
> +(define_mode_attr ph_cvt_sf
> +  [(V4SF  "V8HF") (V8SF  "V16HF")])
> +
> +(define_int_iterator VBCSTNE
> +  [UNSPEC_VCVTNEBF16SF
> +   UNSPEC_VCVTNESHSF])
> +
> +(define_int_attr vbcstnetype
> +  [(UNSPEC_VCVTNEBF16SF "bf16") (UNSPEC_VCVTNESHSF "sh")])
> +
> +(define_insn "vbcstne<vbcstnetype>2ps_<mode>"
> +  [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
> +    (vec_duplicate:VF1_128_256
> +     (unspec:SF
> +      [(match_operand:HI 1 "memory_operand" "m")]
> +      VBCSTNE)))]
> +  "TARGET_AVXNECONVERT"
> +  "vbcstne<vbcstnetype>2ps\t{%1, %0|%0, %1}"
> +  [(set_attr "prefix" "vex")
> +  (set_attr "mode" "<sseinsnmode>")])
Since jakub has support bf16 software emulation, can we rewrite it
with general rtl ir without unspec?
Like (float_extend:SF (match_operand:BF "memory_operand" "m")
> +
> +(define_int_iterator VCVTNEBF16
> +  [UNSPEC_VCVTNEEBF16SF
> +   UNSPEC_VCVTNEOBF16SF])
> +
> +(define_int_attr vcvtnebf16type
> +  [(UNSPEC_VCVTNEEBF16SF "ebf16")
> +   (UNSPEC_VCVTNEOBF16SF "obf16")])
> +(define_insn "vcvtne<vcvtnebf16type>2ps_<mode>"
> +  [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
> +    (unspec:VF1_128_256
> +      [(match_operand:<sf_bfloat16> 1 "memory_operand" "m")]
> +     VCVTNEBF16))]
> +  "TARGET_AVXNECONVERT"
> +  "vcvtne<vcvtnebf16type>2ps\t{%1, %0|%0, %1}"
> +  [(set_attr "prefix" "vex")
> +   (set_attr "mode" "<sseinsnmode>")])
Similar for this one and all those patterns below.
> +
> +(define_int_iterator VCVTNEPH
> +  [UNSPEC_VCVTNEEPHSF
> +   UNSPEC_VCVTNEOPHSF])
> +
> +(define_int_attr vcvtnephtype
> +  [(UNSPEC_VCVTNEEPHSF "eph")
> +   (UNSPEC_VCVTNEOPHSF "oph")])
> +
> +(define_insn "vcvtne<vcvtnephtype>2ps_<mode>"
> +  [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
> +    (unspec:VF1_128_256
> +     [(match_operand:<ph_cvt_sf> 1 "memory_operand" "m")]
> +   VCVTNEPH))]
> +  "TARGET_AVXNECONVERT"
> +  "vcvtne<vcvtnephtype>2ps\t{%1, %0|%0, %1}"
> +  [(set_attr "prefix" "vex")
> +   (set_attr "mode" "<sseinsnmode>")])
>
>  (define_expand "avx512f_cvtne2ps2bf16_<mode>_maskz"
>    [(match_operand:BF16 0 "register_operand")
> @@ -28966,13 +29034,41 @@
>    DONE;
>  })
>
> -(define_insn "avx512f_cvtneps2bf16_<mode><mask_name>"
> +(define_insn "avx_vcvtneps2bf16_<mode>"
> +  [(set (match_operand:<sf_cvt_bfloat16> 0 "register_operand" "=v")
> +    (unspec:<sf_cvt_bfloat16>
> +     [(match_operand:VF1_128_256 1 "register_operand" "v")]
> +     UNSPEC_VCVTNEPS2BF16))]
> +  "TARGET_AVXNECONVERT"
> +  "%{vex%} vcvtneps2bf16\t{%1, %0|%0, %1}"
> +  [(set_attr "prefix" "vex")])
> +
> +(define_insn "avx512f_cvtneps2bf16_<mode>"
>    [(set (match_operand:<sf_cvt_bf16> 0 "register_operand" "=v")
>         (unspec:<sf_cvt_bf16>
>           [(match_operand:VF1_AVX512VL 1 "register_operand" "v")]
>          UNSPEC_VCVTNEPS2BF16))]
>    "TARGET_AVX512BF16"
> -  "vcvtneps2bf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
> +  {
> +    if (<MODE_SIZE> <=32
> +       && TARGET_AVXNECONVERT
> +       && !EXT_REX_SSE_REG_P (operands[0])
> +       && !EXT_REX_SSE_REG_P (operands[1]))
> +      return "%{vex%} vcvtneps2bf16\t{%1, %0|%0, %1}";
> +    else
> +      return "vcvtneps2bf16\t{%1, %0|%0, %1}";
> +  })
> +
> +(define_insn "avx512f_cvtneps2bf16_<mode>_mask"
> +  [(set (match_operand:<sf_cvt_bf16> 0 "register_operand" "=v")
> +    (vec_merge:<sf_cvt_bf16>
> +      (unspec:<sf_cvt_bf16>
> +         [(match_operand:VF1_AVX512VL 1 "register_operand" "v")]
> +           UNSPEC_VCVTNEPS2BF16)
> +      (match_operand:<sf_cvt_bf16> 2 "nonimm_or_0_operand" "0C")
> +      (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
> +  "TARGET_AVX512BF16"
> +  "vcvtneps2bf16\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}")
>
>  (define_expand "avx512f_dpbf16ps_<mode>_maskz"
>    [(match_operand:VF1_AVX512VL 0 "register_operand")
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index 9a8de9fc226..0a4396f92bb 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -7070,6 +7070,11 @@ Enable/disable the generation of the AVXIFMA instructions.
>  @cindex @code{target("avxvnniint8")} function attribute, x86
>  Enable/disable the generation of the AVXVNNIINT8 instructions.
>
> +@item avxneconvert
> +@itemx no-avxneconvert
> +@cindex @code{target("avxneconvert")} function attribute, x86
> +Enable/disable the generation of the AVXNECONVERT instructions.
> +
>  @item cld
>  @itemx no-cld
>  @cindex @code{target("cld")} function attribute, x86
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index d4ff7549bf3..307fb7fa441 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -1436,7 +1436,7 @@ See RS/6000 and PowerPC Options.
>  -mavx5124fmaps  -mavx512vnni  -mavx5124vnniw  -mprfchw  -mrdpid @gol
>  -mrdseed  -msgx -mavx512vp2intersect -mserialize -mtsxldtrk@gol
>  -mamx-tile  -mamx-int8  -mamx-bf16 -muintr -mhreset -mavxvnni@gol
> --mavx512fp16 -mavxifma -mavxvnniint8 @gol
> +-mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert @gol
>  -mcldemote  -mms-bitfields  -mno-align-stringops  -minline-all-stringops @gol
>  -minline-stringops-dynamically  -mstringop-strategy=@var{alg} @gol
>  -mkl -mwidekl @gol
> @@ -32899,6 +32899,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
>  @need 200
>  @itemx -mavxvnniint8
>  @opindex mavxvnniint8
> +@need 200
> +@itemx -mavxneconvert
> +@opindex mavxneconvert
>  These switches enable the use of instructions in the MMX, SSE,
>  SSE2, SSE3, SSSE3, SSE4, SSE4A, SSE4.1, SSE4.2, AVX, AVX2, AVX512F, AVX512PF,
>  AVX512ER, AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA,
> @@ -32909,8 +32912,8 @@ XSAVEOPT, XSAVEC, XSAVES, RTM, HLE, TBM, MWAITX, CLZERO, PKU, AVX512VBMI2,
>  GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16,
>  ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, SERIALIZE,
>  UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512FP16,
> -AVXIFMA, AVXVNNIINT8 or CLDEMOTE extended instruction sets. Each has a
> -corresponding @option{-mno-} option to disable use of these instructions.
> +AVXIFMA, AVXVNNIINT8, AVXNECONVERT or CLDEMOTE extended instruction sets. Each
> +has a corresponding @option{-mno-} option to disable use of these instructions.
>
>  These extensions are also available as built-in functions: see
>  @ref{x86 Built-in Functions}, for details of the functions enabled and
> diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
> index e21a1d381e0..a12175b6498 100644
> --- a/gcc/doc/sourcebuild.texi
> +++ b/gcc/doc/sourcebuild.texi
> @@ -2493,6 +2493,9 @@ Target supports the execution of @code{avx512vp2intersect} instructions.
>  @item avxifma
>  Target supports the execution of @code{avxifma} instructions.
>
> +@item avxneconvert
> +Target supports the execution of @code{avxneconvert} instructions.
> +
>  @item avxvnniint8
>  Target supports the execution of @code{avxvnniint8} instructions.
>
> diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C
> index ebd01fe47bc..dd3e71f25ed 100644
> --- a/gcc/testsuite/g++.dg/other/i386-2.C
> +++ b/gcc/testsuite/g++.dg/other/i386-2.C
> @@ -1,5 +1,5 @@
>  /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
> -/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt  -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8" } */
> +/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt  -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert" } */
>
>  /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
>     xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
> diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C
> index b66498f1d4c..cd7045cc4e4 100644
> --- a/gcc/testsuite/g++.dg/other/i386-3.C
> +++ b/gcc/testsuite/g++.dg/other/i386-3.C
> @@ -1,5 +1,5 @@
>  /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
> -/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8" } */
> +/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert" } */
>
>  /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
>     xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
> diff --git a/gcc/testsuite/gcc.target/i386/avx-check.h b/gcc/testsuite/gcc.target/i386/avx-check.h
> index 77507ca2edc..666eff50780 100644
> --- a/gcc/testsuite/gcc.target/i386/avx-check.h
> +++ b/gcc/testsuite/gcc.target/i386/avx-check.h
> @@ -28,6 +28,9 @@ main ()
>  #endif
>  #ifdef AVXVNNIINT8
>        && __builtin_cpu_supports ("avxvnniint8")
> +#endif
> +#ifdef AVXNECONVERT
> +      && __builtin_cpu_supports ("avxneconvert")
>  #endif
>        )
>      {
> diff --git a/gcc/testsuite/gcc.target/i386/avx-ne-convert-1.c b/gcc/testsuite/gcc.target/i386/avx-ne-convert-1.c
> new file mode 100644
> index 00000000000..b1848037e81
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx-ne-convert-1.c
> @@ -0,0 +1,45 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavxneconvert -O2" } */
> +/* { dg-final { scan-assembler-times "vbcstnebf162ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vbcstnebf162ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vbcstnesh2ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vbcstnesh2ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vcvtneebf162ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vcvtneebf162ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vcvtneeph2ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vcvtneeph2ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vcvtneobf162ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vcvtneobf162ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vcvtneoph2ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vcvtneoph2ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "\{vex\} vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "\{vex\} vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
> +#include <immintrin.h>
> +
> +volatile __m128 x1;
> +volatile __m256 x2;
> +volatile __m128bf16 res1, res2;
> +const void *a;
> +__m128bf16 *b;
> +__m256bf16 *c;
> +__m128h *d;
> +__m256h *e;
> +
> +void extern
> +avx_ne_convert_test (void)
> +{
> +  x1 = _mm_bcstnebf16_ps (a);
> +  x2 = _mm256_bcstnebf16_ps (a);
> +  x1 = _mm_bcstnesh_ps (a);
> +  x2 = _mm256_bcstnesh_ps (a);
> +  x1 = _mm_cvtneebf16_ps (b);
> +  x2 = _mm256_cvtneebf16_ps (c);
> +  x1 = _mm_cvtneeph_ps (d);
> +  x2 = _mm256_cvtneeph_ps (e);
> +  x1 = _mm_cvtneobf16_ps (b);
> +  x2 = _mm256_cvtneobf16_ps (c);
> +  x1 = _mm_cvtneoph_ps (d);
> +  x2 = _mm256_cvtneoph_ps (e);
> +  res1 = _mm_cvtneps_avx_pbh (x1);
> +  res2 = _mm256_cvtneps_avx_pbh (x2);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/avx-ne-convert-vbcstnebf162ps-2.c b/gcc/testsuite/gcc.target/i386/avx-ne-convert-vbcstnebf162ps-2.c
> new file mode 100644
> index 00000000000..2707c58f7cd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx-ne-convert-vbcstnebf162ps-2.c
> @@ -0,0 +1,54 @@
> +/* { dg-do run } */
> +/* { dg-options "-mavxneconvert -O2" } */
> +/* { dg-require-effective-target avxneconvert } */
> +#define AVXNECONVERT
> +#include <stdint.h>
> +
> +#ifndef CHECK
> +#define CHECK "avx-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST avx_test
> +#endif
> +
> +#include CHECK
> +
> +typedef union
> +{
> +  uint32_t int32;
> +  float flt;
> +} float_int_t;
> +
> +static uint16_t convert_fp32_to_bf16 (float fp)
> +{
> +  float_int_t fi;
> +  fi.flt = fp;
> +  return ((fi.int32 >> 16) & 0xffff);
> +}
> +
> +void TEST (void)
> +{
> +  union128 dst_128;
> +  union256 dst_256;
> +  float res_ref_128[4], res_ref_256[8], fp32;
> +  uint16_t var;
> +  fp32 = (float) 3 * 2 + 5.5;
> +  for (int i = 0; i < 4; i++)
> +  {
> +    res_ref_128[i] = fp32;
> +    dst_128.a[i] = 117;
> +  }
> +  for (int i = 0; i < 8; i++)
> +  {
> +    res_ref_256[i] = fp32;
> +    dst_256.a[i] = 117;
> +  }
> +  var = convert_fp32_to_bf16 (fp32);
> +  dst_128.x = _mm_bcstnebf16_ps (&var);
> +  dst_256.x = _mm256_bcstnebf16_ps (&var);
> +  if (check_union128 (dst_128, res_ref_128))
> +    abort();
> +  if (check_union256 (dst_256, res_ref_256))
> +    abort();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/avx-ne-convert-vbcstnesh2ps-2.c b/gcc/testsuite/gcc.target/i386/avx-ne-convert-vbcstnesh2ps-2.c
> new file mode 100644
> index 00000000000..0e6f38334b8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx-ne-convert-vbcstnesh2ps-2.c
> @@ -0,0 +1,42 @@
> +/* { dg-do run } */
> +/* { dg-options "-mavxneconvert -mf16c -O2" } */
> +/* { dg-require-effective-target avxneconvert } */
> +#define AVXNECONVERT
> +#include <stdint.h>
> +#include <immintrin.h>
> +
> +#ifndef CHECK
> +#define CHECK "avx-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST avx_test
> +#endif
> +
> +#include CHECK
> +
> +void TEST (void)
> +{
> +  union128 dst_128;
> +  union256 dst_256;
> +  float res_ref_128[4], res_ref_256[8], fp32;
> +  uint16_t var;
> +  fp32 = (float) 3 * 2 + 8.5;
> +  for (int i = 0; i < 4; i++)
> +  {
> +    res_ref_128[i] = fp32;
> +    dst_128.a[i] = 117;
> +  }
> +  for (int i = 0; i < 8; i++)
> +  {
> +    res_ref_256[i] = fp32;
> +    dst_256.a[i] = 117;
> +  }
> +  var = _cvtss_sh (fp32, 0);
> +  dst_128.x = _mm_bcstnesh_ps (&var);
> +  dst_256.x = _mm256_bcstnesh_ps (&var);
> +  if (check_union128 (dst_128, res_ref_128))
> +    abort();
> +  if (check_union256 (dst_256, res_ref_256))
> +    abort();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneebf162ps-2.c b/gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneebf162ps-2.c
> new file mode 100644
> index 00000000000..c80f3fdedec
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneebf162ps-2.c
> @@ -0,0 +1,73 @@
> +/* { dg-do run } */
> +/* { dg-options "-mavxneconvert -O2" } */
> +/* { dg-require-effective-target avxneconvert } */
> +#define AVXNECONVERT
> +#include <stdint.h>
> +
> +#ifndef CHECK
> +#define CHECK "avx-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST avx_test
> +#endif
> +
> +#include CHECK
> +
> +typedef union
> +{
> +  uint32_t int32;
> +  float flt;
> +} float_int_t;
> +
> +typedef union
> +{
> +  __m128bf16 x;
> +  uint32_t a[4];
> +} union128bf16_i;
> +
> +typedef union
> +{
> +  __m256bf16 x;
> +  uint32_t a[8];
> +} union256bf16_i;
> +
> +static uint16_t convert_fp32_to_bf16 (float fp)
> +{
> +  float_int_t fi;
> +  fi.flt = fp;
> +  return ((fi.int32 >> 16) & 0xffff);
> +}
> +
> +void TEST (void)
> +{
> +  union128 dst_128;
> +  union256 dst_256;
> +  float res_ref_128[4], res_ref_256[8], fp32;
> +  uint16_t bf16;
> +  union128bf16_i src_128bh;
> +  union256bf16_i src_256bh;
> +
> +  for (int i = 0; i < 4; i++)
> +  {
> +    fp32 = (float) 3 * i + 5 + i * 0.5;
> +    bf16 = convert_fp32_to_bf16 (fp32);
> +    src_128bh.a[i] = bf16; // store bf16 at the lower part of the dword
> +    res_ref_128[i] = fp32;
> +    dst_128.a[i] = 117;
> +  }
> +  for (int i = 0; i < 8; i++)
> +  {
> +    fp32 = (float) 3 * i + 5 + i * 0.5;
> +    bf16 = convert_fp32_to_bf16 (fp32);
> +    src_256bh.a[i] = bf16; // store bf16 at the lower part of the dword
> +    res_ref_256[i] = fp32;
> +    dst_256.a[i] = 117;
> +  }
> +  dst_128.x = _mm_cvtneebf16_ps (&src_128bh.x);
> +  dst_256.x = _mm256_cvtneebf16_ps (&src_256bh.x);
> +  if (check_union128 (dst_128, res_ref_128))
> +    abort();
> +  if (check_union256 (dst_256, res_ref_256))
> +    abort();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneeph2ps-2.c b/gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneeph2ps-2.c
> new file mode 100644
> index 00000000000..a862894746d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneeph2ps-2.c
> @@ -0,0 +1,66 @@
> +/* { dg-do run } */
> +/* { dg-options "-mavxneconvert -mf16c -O2" } */
> +/* { dg-require-effective-target avxneconvert } */
> +#define AVXNECONVERT
> +#include <stdint.h>
> +
> +#ifndef CHECK
> +#define CHECK "avx-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST avx_test
> +#endif
> +
> +#include CHECK
> +
> +typedef union
> +{
> +  uint32_t int32;
> +  float flt;
> +} float_int_t;
> +
> +typedef union
> +{
> +  __m128h x;
> +  uint32_t a[4];
> +} union128h;
> +
> +typedef union
> +{
> +  __m256h x;
> +  uint32_t a[8];
> +} union256h;
> +
> +void TEST (void)
> +{
> +  union128 dst_128;
> +  union256 dst_256;
> +  float res_ref_128[4], res_ref_256[8], fp32;
> +  uint16_t fp16;
> +  union128h src_128h;
> +  union256h src_256h;
> +
> +  for (int i = 0; i < 4; i++)
> +  {
> +    fp32 = (float) 3 * i + 5 + i * 0.5;
> +    fp16 = _cvtss_sh (fp32, 0);
> +    src_128h.a[i] = fp16;
> +    res_ref_128[i] = fp32;
> +    dst_128.a[i] = 117;
> +  }
> +  for (int i = 0; i < 8; i++)
> +  {
> +    fp32 = (float) 3 * i + 5 + i * 0.5;
> +    fp16 = _cvtss_sh (fp32, 0);
> +    src_256h.a[i] = fp16;
> +    res_ref_256[i] = fp32;
> +    dst_256.a[i] = 117;
> +  }
> +  dst_128.x = _mm_cvtneeph_ps (&src_128h.x);
> +  dst_256.x = _mm256_cvtneeph_ps (&src_256h.x);
> +  if (check_union128 (dst_128, res_ref_128))
> +    abort();
> +  if (check_union256 (dst_256, res_ref_256))
> +    abort();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneobf162ps-2.c b/gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneobf162ps-2.c
> new file mode 100644
> index 00000000000..d95aee067ae
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneobf162ps-2.c
> @@ -0,0 +1,75 @@
> +/* { dg-do run } */
> +/* { dg-options "-mavxneconvert -O2" } */
> +/* { dg-require-effective-target avxneconvert } */
> +#define AVXNECONVERT
> +#include <stdint.h>
> +
> +#ifndef CHECK
> +#define CHECK "avx-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST avx_test
> +#endif
> +
> +#include CHECK
> +
> +typedef union
> +{
> +  uint32_t int32;
> +  float flt;
> +} float_int_t;
> +
> +typedef union
> +{
> +  __m128bf16 x;
> +  uint32_t a[4];
> +} union128bf16_i;
> +
> +typedef union
> +{
> +  __m256bf16 x;
> +  uint32_t a[8];
> +} union256bf16_i;
> +
> +static uint16_t convert_fp32_to_bf16 (float fp)
> +{
> +  float_int_t fi;
> +  fi.flt = fp;
> +  return ((fi.int32 >> 16) & 0xffff);
> +}
> +
> +void TEST (void)
> +{
> +  union128 dst_128;
> +  union256 dst_256;
> +  float res_ref_128[4], res_ref_256[8], fp32;
> +  uint16_t bf16;
> +  union128bf16_i src_128bh;
> +  union256bf16_i src_256bh;
> +
> +  for (int i = 0; i < 4; i++)
> +  {
> +    fp32 = (float) 3 * i + 5 + i * 0.5;
> +    bf16 = convert_fp32_to_bf16 (fp32);
> +    // store bf16 at the upper part of the dword
> +    src_128bh.a[i] = (bf16 << 16) & 0xffff0000;
> +    res_ref_128[i] = fp32;
> +    dst_128.a[i] = 117;
> +  }
> +  for (int i = 0; i < 8; i++)
> +  {
> +    fp32 = (float) 3 * i + 5 + i * 0.5;
> +    bf16 = convert_fp32_to_bf16 (fp32);
> +    // store bf16 at the upper part of the dword
> +    src_256bh.a[i] = (bf16 << 16) & 0xffff0000;
> +    res_ref_256[i] = fp32;
> +    dst_256.a[i] = 117;
> +  }
> +  dst_128.x = _mm_cvtneobf16_ps (&src_128bh.x);
> +  dst_256.x = _mm256_cvtneobf16_ps (&src_256bh.x);
> +  if (check_union128 (dst_128, res_ref_128))
> +    abort();
> +  if (check_union256 (dst_256, res_ref_256))
> +    abort();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneoph2ps-2.c b/gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneoph2ps-2.c
> new file mode 100644
> index 00000000000..95eb5d74765
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneoph2ps-2.c
> @@ -0,0 +1,66 @@
> +/* { dg-do run } */
> +/* { dg-options "-mavxneconvert -mf16c -O2" } */
> +/* { dg-require-effective-target avxneconvert } */
> +#define AVXNECONVERT
> +#include <stdint.h>
> +
> +#ifndef CHECK
> +#define CHECK "avx-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST avx_test
> +#endif
> +
> +#include CHECK
> +
> +typedef union
> +{
> +  uint32_t int32;
> +  float flt;
> +} float_int_t;
> +
> +typedef union
> +{
> +  __m128h x;
> +  uint32_t a[4];
> +} union128h;
> +
> +typedef union
> +{
> +  __m256h x;
> +  uint32_t a[8];
> +} union256h;
> +
> +void TEST (void)
> +{
> +  union128 dst_128;
> +  union256 dst_256;
> +  float res_ref_128[4], res_ref_256[8], fp32;
> +  uint16_t fp16;
> +  union128h src_128h;
> +  union256h src_256h;
> +
> +  for (int i = 0; i < 4; i++)
> +  {
> +    fp32 = (float) 3 * i + 5 + i * 0.5;
> +    fp16 = _cvtss_sh (fp32, 0);
> +    src_128h.a[i] = fp16 << 16;
> +    res_ref_128[i] = fp32;
> +    dst_128.a[i] = 117;
> +  }
> +  for (int i = 0; i < 8; i++)
> +  {
> +    fp32 = (float) 3 * i + 5 + i * 0.5;
> +    fp16 = _cvtss_sh (fp32, 0);
> +    src_256h.a[i] = fp16 << 16;
> +    res_ref_256[i] = fp32;
> +    dst_256.a[i] = 117;
> +  }
> +  dst_128.x = _mm_cvtneoph_ps (&src_128h.x);
> +  dst_256.x = _mm256_cvtneoph_ps (&src_256h.x);
> +  if (check_union128 (dst_128, res_ref_128))
> +    abort();
> +  if (check_union256 (dst_256, res_ref_256))
> +    abort();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneps2bf16-2.c b/gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneps2bf16-2.c
> new file mode 100644
> index 00000000000..0861521111a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneps2bf16-2.c
> @@ -0,0 +1,58 @@
> +/* { dg-do run } */
> +/* { dg-options "-mavxneconvert -O2" } */
> +/* { dg-require-effective-target avxneconvert } */
> +#define AVXNECONVERT
> +#include <stdint.h>
> +
> +#ifndef CHECK
> +#define CHECK "avx-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST avx_test
> +#endif
> +
> +#include CHECK
> +
> +typedef union
> +{
> +  uint32_t int32;
> +  float flt;
> +} float_int_t;
> +
> +typedef union
> +{
> +  __m128bf16  x;
> +  unsigned short a[8];
> +} union128bf16;
> +
> +void TEST (void)
> +{
> +  union128 src_128;
> +  union256 src_256;
> +  union128bf16 dst_128, dst_256;
> +  uint16_t res_ref_128[8] = {0}, res_ref_256[8];
> +  float_int_t fp32;
> +  for (int i = 0; i < 4; i++)
> +  {
> +    fp32.flt = (float) 2 * i + 7 + i * 0.25;
> +    src_128.a[i] = fp32.flt;
> +    res_ref_128[i] = fp32.int32 >> 16;
> +    dst_128.a[i] = 117;
> +  }
> +
> +  for (int i = 0; i < 8; i++)
> +  {
> +    fp32.flt = (float) 2 * i + 7 + i * 0.25;
> +    src_256.a[i] = fp32.flt;
> +    res_ref_256[i] = fp32.int32 >> 16;
> +    dst_256.a[i] = 117;
> +  }
> +  dst_128.x = _mm_cvtneps_avx_pbh (src_128.x);
> +  dst_256.x = _mm256_cvtneps_avx_pbh (src_256.x);
> +
> +  if (checkVus (dst_128.a, res_ref_128, 8))
> +    abort();
> +  if (checkVus (dst_128.a, res_ref_128, 8))
> +    abort();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1.c b/gcc/testsuite/gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1a.c
> similarity index 100%
> rename from gcc/testsuite/gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1.c
> rename to gcc/testsuite/gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1a.c
> diff --git a/gcc/testsuite/gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1b.c b/gcc/testsuite/gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1b.c
> new file mode 100644
> index 00000000000..8b5d6a644bc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1b.c
> @@ -0,0 +1,27 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx512bf16 -mavx512vl -mavxneconvert -O2" } */
> +/* { dg-final { scan-assembler-times "\{vex\} vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "\{vex\} vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
> +
> +#include <immintrin.h>
> +
> +volatile __m128bh res1, res2;
> +volatile __m128 x1;
> +volatile __m256 x2;
> +volatile __mmask8 m8;
> +
> +void extern
> +avx512bf16_test (void)
> +{
> +  res2 = _mm256_cvtneps_pbh (x2);
> +  res2 = _mm256_mask_cvtneps_pbh (res2, m8, x2);
> +  res2 = _mm256_maskz_cvtneps_pbh (m8, x2);
> +
> +  res1 = _mm_cvtneps_pbh (x1);
> +  res1 = _mm_mask_cvtneps_pbh (res1, m8, x1);
> +  res1 = _mm_maskz_cvtneps_pbh (m8, x1);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
> index a681bffe3e7..b3d33df7c9c 100644
> --- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc
> +++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
> @@ -82,6 +82,7 @@ extern void test_avxvnni (void)                       __attribute__((__target__("avxvnni")));
>  extern void test_avx512fp16 (void)             __attribute__((__target__("avx512fp16")));
>  extern void test_avxifma (void)                        __attribute__((__target__("avxifma")));
>  extern void test_avxvnniint8 (void)            __attribute__((__target__("avxvnniint8")));
> +extern void test_avxneconvert (void)           __attribute__((__target__("avxneconvert")));
>
>  extern void test_no_sgx (void)                 __attribute__((__target__("no-sgx")));
>  extern void test_no_avx5124fmaps(void)         __attribute__((__target__("no-avx5124fmaps")));
> @@ -165,6 +166,7 @@ extern void test_no_avxvnni (void)          __attribute__((__target__("no-avxvnni")));
>  extern void test_no_avx512fp16 (void)          __attribute__((__target__("no-avx512fp16")));
>  extern void test_no_avxifma (void)             __attribute__((__target__("no-avxifma")));
>  extern void test_no_avxvnniint8 (void)         __attribute__((__target__("no-avxvnniint8")));
> +extern void test_no_avxneconvert (void)                __attribute__((__target__("no-avxneconvert")));
>
>  extern void test_arch_nocona (void)            __attribute__((__target__("arch=nocona")));
>  extern void test_arch_core2 (void)             __attribute__((__target__("arch=core2")));
> diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c
> index ddde2df6657..3eabc49a6ab 100644
> --- a/gcc/testsuite/gcc.target/i386/sse-12.c
> +++ b/gcc/testsuite/gcc.target/i386/sse-12.c
> @@ -3,7 +3,7 @@
>     popcntintrin.h gfniintrin.h and mm_malloc.h are usable
>     with -O -std=c89 -pedantic-errors.  */
>  /* { dg-do compile } */
> -/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8" } */
> +/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert" } */
>
>  #include <x86intrin.h>
>
> diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
> index 2b293216c6f..b9cdfb690d1 100644
> --- a/gcc/testsuite/gcc.target/i386/sse-13.c
> +++ b/gcc/testsuite/gcc.target/i386/sse-13.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8" } */
> +/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert" } */
>  /* { dg-add-options bind_pic_locally } */
>
>  #include <mm_malloc.h>
> diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c
> index 78b51048b90..b6ee3806dcc 100644
> --- a/gcc/testsuite/gcc.target/i386/sse-14.c
> +++ b/gcc/testsuite/gcc.target/i386/sse-14.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8" } */
> +/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert" } */
>  /* { dg-add-options bind_pic_locally } */
>
>  #include <mm_malloc.h>
> diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c
> index cc1c8cfa4be..71ac0f3da19 100644
> --- a/gcc/testsuite/gcc.target/i386/sse-22.c
> +++ b/gcc/testsuite/gcc.target/i386/sse-22.c
> @@ -103,7 +103,7 @@
>
>
>  #ifndef DIFFERENT_PRAGMAS
> -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8")
> +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8,avxneconvert")
>  #endif
>
>  /* Following intrinsics require immediate arguments.  They
> @@ -220,7 +220,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1)
>
>  /* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */
>  #ifdef DIFFERENT_PRAGMAS
> -#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8")
> +#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8,avxneconvert")
>  #endif
>  #include <immintrin.h>
>  test_1 (_cvtss_sh, unsigned short, float, 1)
> diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
> index 270f4483491..898dde80c8f 100644
> --- a/gcc/testsuite/gcc.target/i386/sse-23.c
> +++ b/gcc/testsuite/gcc.target/i386/sse-23.c
> @@ -843,6 +843,6 @@
>  #define __builtin_ia32_vpclmulqdq_v2di(A, B, C)  __builtin_ia32_vpclmulqdq_v2di(A, B, 1)
>  #define __builtin_ia32_vpclmulqdq_v8di(A, B, C)  __builtin_ia32_vpclmulqdq_v8di(A, B, 1)
>
> -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8")
> +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8,avxneconvert")
>
>  #include <x86intrin.h>
> diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
> index 64ccfc746bd..9228e810c45 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -9530,6 +9530,18 @@ proc check_effective_target_avxvnniint8 { } {
>      } "-O0 -mavxvnniint8" ]
>  }
>
> +# Return 1 if avxneconvert instructions can be compiled.
> +proc check_effective_target_avxneconvert { } {
> +    return [check_no_compiler_messages avxneconvert object {
> +       typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
> +       __m128
> +       _mm_bcstnebf16_ps (const void *__P)
> +       {
> +          return (__m128) __builtin_ia32_vbcstnebf162ps128 ((const short *) __P);
> +       }
> +    } "-O0 -mavxneconvert" ]
> +}
> +
>  # Return 1 if sse instructions can be compiled.
>  proc check_effective_target_sse { } {
>      return [check_no_compiler_messages sse object {
> --
> 2.18.1
>
  
Li, Pan2 via Gcc-patches Oct. 24, 2022, 6:20 a.m. UTC | #2
> From: Gcc-patches <gcc-patches-bounces+lingling.kong=intel.com@gcc.gnu.org>
> On Behalf Of Hongtao Liu via Gcc-patches
> Sent: Monday, October 17, 2022 1:47 PM
> To: Jiang, Haochen <haochen.jiang@intel.com>
> Cc: Liu, Hongtao <hongtao.liu@intel.com>; gcc-patches@gcc.gnu.org
> Subject: Re: [PATCH 4/6] Support Intel AVX-NE-CONVERT
>
> On Fri, Oct 14, 2022 at 3:58 PM Haochen Jiang via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > From: Kong Lingling <lingling.kong@intel.com>
> > +(define_insn "vbcstne<vbcstnetype>2ps_<mode>"
> > +  [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
> > +    (vec_duplicate:VF1_128_256
> > +     (unspec:SF
> > +      [(match_operand:HI 1 "memory_operand" "m")]
> > +      VBCSTNE)))]
> > +  "TARGET_AVXNECONVERT"
> > +  "vbcstne<vbcstnetype>2ps\t{%1, %0|%0, %1}"
> > +  [(set_attr "prefix" "vex")
> > +  (set_attr "mode" "<sseinsnmode>")])
> Since jakub has support bf16 software emulation, can we rewrite it
> with general rtl ir without unspec?
> Like (float_extend:SF (match_operand:BF "memory_operand" "m")
> > +
> > +(define_int_iterator VCVTNEBF16
> > +  [UNSPEC_VCVTNEEBF16SF
> > +   UNSPEC_VCVTNEOBF16SF])
> > +
> > +(define_int_attr vcvtnebf16type
> > +  [(UNSPEC_VCVTNEEBF16SF "ebf16")
> > +   (UNSPEC_VCVTNEOBF16SF "obf16")])
> > +(define_insn "vcvtne<vcvtnebf16type>2ps_<mode>"
> > +  [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
> > +    (unspec:VF1_128_256
> > +      [(match_operand:<sf_bfloat16> 1 "memory_operand" "m")]
> > +     VCVTNEBF16))]
> > +  "TARGET_AVXNECONVERT"
> > +  "vcvtne<vcvtnebf16type>2ps\t{%1, %0|%0, %1}"
> > +  [(set_attr "prefix" "vex")
> > +   (set_attr "mode" "<sseinsnmode>")])
> Similar for this one and all those patterns below.

That's great! Thanks for the review! 
Now rewrite it without unspec and use float_extend for new define_insn.

Thanks
Lingling
  
Hongtao Liu Oct. 25, 2022, 5:23 a.m. UTC | #3
On Mon, Oct 24, 2022 at 2:20 PM Kong, Lingling <lingling.kong@intel.com> wrote:
>
> > From: Gcc-patches <gcc-patches-bounces+lingling.kong=intel.com@gcc.gnu.org>
> > On Behalf Of Hongtao Liu via Gcc-patches
> > Sent: Monday, October 17, 2022 1:47 PM
> > To: Jiang, Haochen <haochen.jiang@intel.com>
> > Cc: Liu, Hongtao <hongtao.liu@intel.com>; gcc-patches@gcc.gnu.org
> > Subject: Re: [PATCH 4/6] Support Intel AVX-NE-CONVERT
> >
> > On Fri, Oct 14, 2022 at 3:58 PM Haochen Jiang via Gcc-patches
> > <gcc-patches@gcc.gnu.org> wrote:
> > >
> > > From: Kong Lingling <lingling.kong@intel.com>
> > > +(define_insn "vbcstne<vbcstnetype>2ps_<mode>"
> > > +  [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
> > > +    (vec_duplicate:VF1_128_256
> > > +     (unspec:SF
> > > +      [(match_operand:HI 1 "memory_operand" "m")]
> > > +      VBCSTNE)))]
> > > +  "TARGET_AVXNECONVERT"
> > > +  "vbcstne<vbcstnetype>2ps\t{%1, %0|%0, %1}"
> > > +  [(set_attr "prefix" "vex")
> > > +  (set_attr "mode" "<sseinsnmode>")])
> > Since jakub has support bf16 software emulation, can we rewrite it
> > with general rtl ir without unspec?
> > Like (float_extend:SF (match_operand:BF "memory_operand" "m")
> > > +
> > > +(define_int_iterator VCVTNEBF16
> > > +  [UNSPEC_VCVTNEEBF16SF
> > > +   UNSPEC_VCVTNEOBF16SF])
> > > +
> > > +(define_int_attr vcvtnebf16type
> > > +  [(UNSPEC_VCVTNEEBF16SF "ebf16")
> > > +   (UNSPEC_VCVTNEOBF16SF "obf16")])
> > > +(define_insn "vcvtne<vcvtnebf16type>2ps_<mode>"
> > > +  [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
> > > +    (unspec:VF1_128_256
> > > +      [(match_operand:<sf_bfloat16> 1 "memory_operand" "m")]
> > > +     VCVTNEBF16))]
> > > +  "TARGET_AVXNECONVERT"
> > > +  "vcvtne<vcvtnebf16type>2ps\t{%1, %0|%0, %1}"
> > > +  [(set_attr "prefix" "vex")
> > > +   (set_attr "mode" "<sseinsnmode>")])
> > Similar for this one and all those patterns below.
>
> That's great! Thanks for the review!
> Now rewrite it without unspec and use float_extend for new define_insn.
Ok.
>
> Thanks
> Lingling
>
>
  
Li, Pan2 via Gcc-patches Oct. 28, 2022, 8:57 a.m. UTC | #4
Hi,

Because we  switch intrinsics for avx512bf16 to the new type __bf16. Now we could use m128/256bh for vector bf16 type instead of m128/256bf16.
And unified builtin for avx512bf16/avxneconvert.

Thanks,
Lingling

> -----Original Message-----
> From: Hongtao Liu <crazylht@gmail.com>
> Sent: Tuesday, October 25, 2022 1:23 PM
> To: Kong, Lingling <lingling.kong@intel.com>
> Cc: Liu, Hongtao <hongtao.liu@intel.com>; gcc-patches@gcc.gnu.org; Jiang,
> Haochen <haochen.jiang@intel.com>
> Subject: Re: [PATCH 4/6] Support Intel AVX-NE-CONVERT
> 
> On Mon, Oct 24, 2022 at 2:20 PM Kong, Lingling <lingling.kong@intel.com>
> wrote:
> >
> > > From: Gcc-patches
> > > <gcc-patches-bounces+lingling.kong=intel.com@gcc.gnu.org>
> > > On Behalf Of Hongtao Liu via Gcc-patches
> > > Sent: Monday, October 17, 2022 1:47 PM
> > > To: Jiang, Haochen <haochen.jiang@intel.com>
> > > Cc: Liu, Hongtao <hongtao.liu@intel.com>; gcc-patches@gcc.gnu.org
> > > Subject: Re: [PATCH 4/6] Support Intel AVX-NE-CONVERT
> > >
> > > On Fri, Oct 14, 2022 at 3:58 PM Haochen Jiang via Gcc-patches
> > > <gcc-patches@gcc.gnu.org> wrote:
> > > >
> > > > From: Kong Lingling <lingling.kong@intel.com>
> > > > +(define_insn "vbcstne<vbcstnetype>2ps_<mode>"
> > > > +  [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
> > > > +    (vec_duplicate:VF1_128_256
> > > > +     (unspec:SF
> > > > +      [(match_operand:HI 1 "memory_operand" "m")]
> > > > +      VBCSTNE)))]
> > > > +  "TARGET_AVXNECONVERT"
> > > > +  "vbcstne<vbcstnetype>2ps\t{%1, %0|%0, %1}"
> > > > +  [(set_attr "prefix" "vex")
> > > > +  (set_attr "mode" "<sseinsnmode>")])
> > > Since jakub has support bf16 software emulation, can we rewrite it
> > > with general rtl ir without unspec?
> > > Like (float_extend:SF (match_operand:BF "memory_operand" "m")
> > > > +
> > > > +(define_int_iterator VCVTNEBF16
> > > > +  [UNSPEC_VCVTNEEBF16SF
> > > > +   UNSPEC_VCVTNEOBF16SF])
> > > > +
> > > > +(define_int_attr vcvtnebf16type
> > > > +  [(UNSPEC_VCVTNEEBF16SF "ebf16")
> > > > +   (UNSPEC_VCVTNEOBF16SF "obf16")]) (define_insn
> > > > +"vcvtne<vcvtnebf16type>2ps_<mode>"
> > > > +  [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
> > > > +    (unspec:VF1_128_256
> > > > +      [(match_operand:<sf_bfloat16> 1 "memory_operand" "m")]
> > > > +     VCVTNEBF16))]
> > > > +  "TARGET_AVXNECONVERT"
> > > > +  "vcvtne<vcvtnebf16type>2ps\t{%1, %0|%0, %1}"
> > > > +  [(set_attr "prefix" "vex")
> > > > +   (set_attr "mode" "<sseinsnmode>")])
> > > Similar for this one and all those patterns below.
> >
> > That's great! Thanks for the review!
> > Now rewrite it without unspec and use float_extend for new define_insn.
> Ok.
> >
> > Thanks
> > Lingling
> >
> >
> 
> 
> --
> BR,
> Hongtao
  
Li, Pan2 via Gcc-patches Oct. 31, 2022, 1:05 a.m. UTC | #5
> -----Original Message-----
> From: Kong, Lingling <lingling.kong@intel.com>
> Sent: Friday, October 28, 2022 4:57 PM
> To: Hongtao Liu <crazylht@gmail.com>
> Cc: Liu, Hongtao <hongtao.liu@intel.com>; gcc-patches@gcc.gnu.org; Jiang,
> Haochen <haochen.jiang@intel.com>
> Subject: RE: [PATCH 4/6] Support Intel AVX-NE-CONVERT
> 
> Hi,
> 
> Because we  switch intrinsics for avx512bf16 to the new type __bf16. Now we
> could use m128/256bh for vector bf16 type instead of m128/256bf16.
> And unified builtin for avx512bf16/avxneconvert.
Ok.
> 
> Thanks,
> Lingling
> 
> > -----Original Message-----
> > From: Hongtao Liu <crazylht@gmail.com>
> > Sent: Tuesday, October 25, 2022 1:23 PM
> > To: Kong, Lingling <lingling.kong@intel.com>
> > Cc: Liu, Hongtao <hongtao.liu@intel.com>; gcc-patches@gcc.gnu.org;
> > Jiang, Haochen <haochen.jiang@intel.com>
> > Subject: Re: [PATCH 4/6] Support Intel AVX-NE-CONVERT
> >
> > On Mon, Oct 24, 2022 at 2:20 PM Kong, Lingling
> > <lingling.kong@intel.com>
> > wrote:
> > >
> > > > From: Gcc-patches
> > > > <gcc-patches-bounces+lingling.kong=intel.com@gcc.gnu.org>
> > > > On Behalf Of Hongtao Liu via Gcc-patches
> > > > Sent: Monday, October 17, 2022 1:47 PM
> > > > To: Jiang, Haochen <haochen.jiang@intel.com>
> > > > Cc: Liu, Hongtao <hongtao.liu@intel.com>; gcc-patches@gcc.gnu.org
> > > > Subject: Re: [PATCH 4/6] Support Intel AVX-NE-CONVERT
> > > >
> > > > On Fri, Oct 14, 2022 at 3:58 PM Haochen Jiang via Gcc-patches
> > > > <gcc-patches@gcc.gnu.org> wrote:
> > > > >
> > > > > From: Kong Lingling <lingling.kong@intel.com>
> > > > > +(define_insn "vbcstne<vbcstnetype>2ps_<mode>"
> > > > > +  [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
> > > > > +    (vec_duplicate:VF1_128_256
> > > > > +     (unspec:SF
> > > > > +      [(match_operand:HI 1 "memory_operand" "m")]
> > > > > +      VBCSTNE)))]
> > > > > +  "TARGET_AVXNECONVERT"
> > > > > +  "vbcstne<vbcstnetype>2ps\t{%1, %0|%0, %1}"
> > > > > +  [(set_attr "prefix" "vex")
> > > > > +  (set_attr "mode" "<sseinsnmode>")])
> > > > Since jakub has support bf16 software emulation, can we rewrite it
> > > > with general rtl ir without unspec?
> > > > Like (float_extend:SF (match_operand:BF "memory_operand" "m")
> > > > > +
> > > > > +(define_int_iterator VCVTNEBF16
> > > > > +  [UNSPEC_VCVTNEEBF16SF
> > > > > +   UNSPEC_VCVTNEOBF16SF])
> > > > > +
> > > > > +(define_int_attr vcvtnebf16type
> > > > > +  [(UNSPEC_VCVTNEEBF16SF "ebf16")
> > > > > +   (UNSPEC_VCVTNEOBF16SF "obf16")]) (define_insn
> > > > > +"vcvtne<vcvtnebf16type>2ps_<mode>"
> > > > > +  [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
> > > > > +    (unspec:VF1_128_256
> > > > > +      [(match_operand:<sf_bfloat16> 1 "memory_operand" "m")]
> > > > > +     VCVTNEBF16))]
> > > > > +  "TARGET_AVXNECONVERT"
> > > > > +  "vcvtne<vcvtnebf16type>2ps\t{%1, %0|%0, %1}"
> > > > > +  [(set_attr "prefix" "vex")
> > > > > +   (set_attr "mode" "<sseinsnmode>")])
> > > > Similar for this one and all those patterns below.
> > >
> > > That's great! Thanks for the review!
> > > Now rewrite it without unspec and use float_extend for new define_insn.
> > Ok.
> > >
> > > Thanks
> > > Lingling
> > >
> > >
> >
> >
> > --
> > BR,
> > Hongtao
  

Patch

diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index bed88003f8e..e9fd586704d 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ b/gcc/common/config/i386/cpuinfo.h
@@ -797,6 +797,8 @@  get_available_features (struct __processor_model *cpu_model,
 	    set_feature (FEATURE_AVXIFMA);
 	  if (edx & bit_AVXVNNIINT8)
 	    set_feature (FEATURE_AVXVNNIINT8);
+	  if (edx & bit_AVXNECONVERT)
+	    set_feature (FEATURE_AVXNECONVERT);
 	}
       if (avx512_usable)
 	{
diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
index 6a2a7e3d25a..f9c906f75cf 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -109,6 +109,7 @@  along with GCC; see the file COPYING3.  If not see
 #define OPTION_MASK_ISA2_AMX_INT8_SET OPTION_MASK_ISA2_AMX_INT8
 #define OPTION_MASK_ISA2_AMX_BF16_SET OPTION_MASK_ISA2_AMX_BF16
 #define OPTION_MASK_ISA2_AVXVNNIINT8_SET OPTION_MASK_ISA2_AVXVNNIINT8
+#define OPTION_MASK_ISA2_AVXNECONVERT_SET OPTION_MASK_ISA2_AVXNECONVERT
 
 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
    as -msse4.2.  */
@@ -215,7 +216,8 @@  along with GCC; see the file COPYING3.  If not see
   (OPTION_MASK_ISA_AVX2 | OPTION_MASK_ISA_AVX512F_UNSET)
 #define OPTION_MASK_ISA2_AVX2_UNSET \
   (OPTION_MASK_ISA2_AVXIFMA_UNSET | OPTION_MASK_ISA2_AVXVNNI_UNSET \
-   | OPTION_MASK_ISA2_AVXVNNIINT8_UNSET | OPTION_MASK_ISA2_AVX512F_UNSET)
+   | OPTION_MASK_ISA2_AVXVNNIINT8_UNSET | OPTION_MASK_ISA2_AVXNECONVERT_UNSET \
+   | OPTION_MASK_ISA2_AVX512F_UNSET)
 #define OPTION_MASK_ISA_AVX512F_UNSET \
   (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD_UNSET \
    | OPTION_MASK_ISA_AVX512PF_UNSET | OPTION_MASK_ISA_AVX512ER_UNSET \
@@ -280,6 +282,7 @@  along with GCC; see the file COPYING3.  If not see
   (OPTION_MASK_ISA2_KL | OPTION_MASK_ISA2_WIDEKL_UNSET)
 #define OPTION_MASK_ISA2_WIDEKL_UNSET OPTION_MASK_ISA2_WIDEKL
 #define OPTION_MASK_ISA2_AVXVNNIINT8_UNSET OPTION_MASK_ISA2_AVXVNNIINT8
+#define OPTION_MASK_ISA2_AVXNECONVERT_UNSET OPTION_MASK_ISA2_AVXNECONVERT
 
 /* SSE4 includes both SSE4.1 and SSE4.2.  -mno-sse4 should the same
    as -mno-sse4.1. */
@@ -1162,6 +1165,22 @@  ix86_handle_option (struct gcc_options *opts,
 	}
       return true;
 
+    case OPT_mavxneconvert:
+      if (value)
+	{
+	  opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVXNECONVERT_SET;
+	  opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVXNECONVERT_SET;
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2_SET;
+	  opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX2_SET;
+	}
+      else
+	{
+	  opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVXNECONVERT_UNSET;
+	  opts->x_ix86_isa_flags2_explicit
+	    |= OPTION_MASK_ISA2_AVXNECONVERT_UNSET;
+	}
+      return true;
+
     case OPT_mfma:
       if (value)
 	{
diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h
index 9a6b92fab79..2d3fbfc817a 100644
--- a/gcc/common/config/i386/i386-cpuinfo.h
+++ b/gcc/common/config/i386/i386-cpuinfo.h
@@ -242,6 +242,7 @@  enum processor_features
   FEATURE_X86_64_V4,
   FEATURE_AVXIFMA,
   FEATURE_AVXVNNIINT8,
+  FEATURE_AVXNECONVERT,
   CPU_FEATURE_MAX
 };
 
diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h
index 8c1f351056c..bceaee589ee 100644
--- a/gcc/common/config/i386/i386-isas.h
+++ b/gcc/common/config/i386/i386-isas.h
@@ -178,4 +178,6 @@  ISA_NAMES_TABLE_START
   ISA_NAMES_TABLE_ENTRY("avxifma", FEATURE_AVXIFMA, P_NONE, "-mavxifma")
   ISA_NAMES_TABLE_ENTRY("avxvnniint8", FEATURE_AVXVNNIINT8,
 			P_NONE, "-mavxvnniint8")
+  ISA_NAMES_TABLE_ENTRY("avxneconvert", FEATURE_AVXNECONVERT,
+			P_NONE, "-mavxneconvert")
 ISA_NAMES_TABLE_END
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 4df78238910..840b62aee61 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -422,7 +422,7 @@  i[34567]86-*-* | x86_64-*-*)
 		       amxbf16intrin.h x86gprintrin.h uintrintrin.h
 		       hresetintrin.h keylockerintrin.h avxvnniintrin.h
 		       mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h
-		       avxifmaintrin.h avxvnniint8intrin.h"
+		       avxifmaintrin.h avxvnniint8intrin.h avxneconvertintrin.h"
 	;;
 ia64-*-*)
 	extra_headers=ia64intrin.h
diff --git a/gcc/config/i386/avxneconvertintrin.h b/gcc/config/i386/avxneconvertintrin.h
new file mode 100644
index 00000000000..30199384725
--- /dev/null
+++ b/gcc/config/i386/avxneconvertintrin.h
@@ -0,0 +1,140 @@ 
+/* Copyright (C) 2021 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avxneconvertintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVXNECONVERTINTRIN_H_INCLUDED
+#define _AVXNECONVERTINTRIN_H_INCLUDED
+
+#ifndef __AVXNECONVERT__
+#pragma GCC push_options
+#pragma GCC target ("avxneconvert")
+#define __DISABLE_AVXNECONVERT__
+#endif /* __AVXNECONVERT__ */
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_bcstnebf16_ps (const void *__P)
+{
+  return (__m128) __builtin_ia32_vbcstnebf162ps128 ((const short *) __P);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_bcstnebf16_ps (const void *__P)
+{
+  return (__m256) __builtin_ia32_vbcstnebf162ps256 ((const short *) __P);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_bcstnesh_ps (const void *__P)
+{
+  return (__m128) __builtin_ia32_vbcstnesh2ps128 ((const short *) __P);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_bcstnesh_ps (const void *__P)
+{
+  return (__m256) __builtin_ia32_vbcstnesh2ps256 ((const short *) __P);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtneebf16_ps (const __m128bf16 *__A)
+{
+  return (__m128) __builtin_ia32_vcvtneebf162ps128 ((const __v8bf *) __A);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtneebf16_ps (const __m256bf16 *__A)
+{
+  return (__m256) __builtin_ia32_vcvtneebf162ps256 ((const __v16bf *) __A);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtneeph_ps (const __m128h *__A)
+{
+  return (__m128) __builtin_ia32_vcvtneeph2ps128 ((const __v8hf *) __A);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtneeph_ps (const __m256h *__A)
+{
+  return (__m256) __builtin_ia32_vcvtneeph2ps256 ((const __v16hf *) __A);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtneobf16_ps (const __m128bf16 *__A)
+{
+  return (__m128) __builtin_ia32_vcvtneobf162ps128 ((const __v8bf *) __A);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtneobf16_ps (const __m256bf16 *__A)
+{
+  return (__m256) __builtin_ia32_vcvtneobf162ps256 ((const __v16bf *) __A);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtneoph_ps (const __m128h *__A)
+{
+  return (__m128) __builtin_ia32_vcvtneoph2ps128 ((const __v8hf *) __A);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtneoph_ps (const __m256h *__A)
+{
+  return (__m256) __builtin_ia32_vcvtneoph2ps256 ((const __v16hf *) __A);
+}
+
+extern __inline __m128bf16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtneps_avx_pbh (__m128 __A)
+{
+  return (__m128bf16) __builtin_ia32_vcvtneps2bf16128 (__A);
+}
+
+extern __inline __m128bf16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtneps_avx_pbh (__m256 __A)
+{
+  return (__m128bf16) __builtin_ia32_vcvtneps2bf16256 (__A);
+}
+
+#ifdef __DISABLE_AVXNECONVERT__
+#undef __DISABLE_AVXNECONVERT__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVXNECONVERT__ */
+
+#endif /* _AVXNECONVERTINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index f5fad22149a..18bbc0cb7be 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -50,6 +50,7 @@ 
 
 /* %edx */
 #define bit_AVXVNNIINT8 (1 << 4)
+#define bit_AVXNECONVERT (1 << 5)
 #define bit_CMPXCHG8B	(1 << 8)
 #define bit_CMOV	(1 << 15)
 #define bit_MMX		(1 << 23)
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 63a360b0f8b..ebf6e5b4ad8 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -87,6 +87,7 @@  DEF_VECTOR_TYPE (V8QI, QI)
 DEF_VECTOR_TYPE (V2DF, DOUBLE)
 DEF_VECTOR_TYPE (V4SF, FLOAT)
 DEF_VECTOR_TYPE (V8HF, FLOAT16)
+DEF_VECTOR_TYPE (V8BF, BFLOAT16)
 DEF_VECTOR_TYPE (V2DI, DI)
 DEF_VECTOR_TYPE (V4SI, SI)
 DEF_VECTOR_TYPE (V8HI, HI)
@@ -100,6 +101,7 @@  DEF_VECTOR_TYPE (V16UQI, UQI, V16QI)
 DEF_VECTOR_TYPE (V4DF, DOUBLE)
 DEF_VECTOR_TYPE (V8SF, FLOAT)
 DEF_VECTOR_TYPE (V16HF, FLOAT16)
+DEF_VECTOR_TYPE (V16BF, BFLOAT16)
 DEF_VECTOR_TYPE (V4DI, DI)
 DEF_VECTOR_TYPE (V8SI, SI)
 DEF_VECTOR_TYPE (V16HI, HI)
@@ -111,6 +113,7 @@  DEF_VECTOR_TYPE (V16UHI, UHI, V16HI)
 # AVX512F vectors
 DEF_VECTOR_TYPE (V32SF, FLOAT)
 DEF_VECTOR_TYPE (V32HF, FLOAT16)
+DEF_VECTOR_TYPE (V32BF, BFLOAT16)
 DEF_VECTOR_TYPE (V16SF, FLOAT)
 DEF_VECTOR_TYPE (V8DF, DOUBLE)
 DEF_VECTOR_TYPE (V8DI, DI)
@@ -179,6 +182,10 @@  DEF_POINTER_TYPE (PCV4DF, V4DF, CONST)
 DEF_POINTER_TYPE (PCV4SF, V4SF, CONST)
 DEF_POINTER_TYPE (PCV8DF, V8DF, CONST)
 DEF_POINTER_TYPE (PCV8SF, V8SF, CONST)
+DEF_POINTER_TYPE (PCV8HF, V8HF, CONST)
+DEF_POINTER_TYPE (PCV8BF, V8BF, CONST)
+DEF_POINTER_TYPE (PCV16HF, V16HF, CONST)
+DEF_POINTER_TYPE (PCV16BF, V16BF, CONST)
 DEF_POINTER_TYPE (PCV16SF, V16SF, CONST)
 
 DEF_POINTER_TYPE (PCV2DI, V2DI, CONST)
@@ -254,12 +261,14 @@  DEF_FUNCTION_TYPE (V4DF, V4SI)
 DEF_FUNCTION_TYPE (V8DF, V8DF)
 DEF_FUNCTION_TYPE (V4HI, V4HI)
 DEF_FUNCTION_TYPE (V4SF, PCFLOAT)
+DEF_FUNCTION_TYPE (V4SF, PCSHORT)
 DEF_FUNCTION_TYPE (V4SF, V2DF)
 DEF_FUNCTION_TYPE (V4SF, V2DF, V4SF, UQI)
 DEF_FUNCTION_TYPE (V4SF, V4DF)
 DEF_FUNCTION_TYPE (V4SF, V4DF, V4SF, UQI)
 DEF_FUNCTION_TYPE (V4SF, V4SF)
 DEF_FUNCTION_TYPE (V4SF, PCV4SF)
+DEF_FUNCTION_TYPE (V4SF, PCV8HF)
 DEF_FUNCTION_TYPE (V4SF, V4SI)
 DEF_FUNCTION_TYPE (V4SF, V8SF)
 DEF_FUNCTION_TYPE (V4SF, V8HI)
@@ -275,8 +284,10 @@  DEF_FUNCTION_TYPE (V8HI, V16QI)
 DEF_FUNCTION_TYPE (V8HI, V8HI)
 DEF_FUNCTION_TYPE (V8QI, V8QI)
 DEF_FUNCTION_TYPE (V8SF, PCFLOAT)
+DEF_FUNCTION_TYPE (V8SF, PCSHORT)
 DEF_FUNCTION_TYPE (V8SF, PCV4SF)
 DEF_FUNCTION_TYPE (V8SF, PCV8SF)
+DEF_FUNCTION_TYPE (V8SF, PCV16HF)
 DEF_FUNCTION_TYPE (V8SF, V4SF)
 DEF_FUNCTION_TYPE (V8SF, V8SF)
 DEF_FUNCTION_TYPE (V8SF, V8SI)
@@ -1389,3 +1400,9 @@  DEF_FUNCTION_TYPE (V32HF, V32HF)
 DEF_FUNCTION_TYPE_ALIAS (V8HF_FTYPE_V8HF, ROUND)
 DEF_FUNCTION_TYPE_ALIAS (V16HF_FTYPE_V16HF, ROUND)
 DEF_FUNCTION_TYPE_ALIAS (V32HF_FTYPE_V32HF, ROUND)
+
+# AVXNECONVERT builtins
+DEF_FUNCTION_TYPE (V8BF, V8SF)
+DEF_FUNCTION_TYPE (V8BF, V4SF)
+DEF_FUNCTION_TYPE (V4SF, PCV8BF)
+DEF_FUNCTION_TYPE (V8SF, PCV16BF)
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index e6edae5728b..a429577180c 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -274,6 +274,20 @@  BDESC (OPTION_MASK_ISA_RTM, 0, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BU
 BDESC (OPTION_MASK_ISA_RTM, 0, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID)
 BDESC (OPTION_MASK_ISA_RTM, 0, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID)
 
+/* AVX-NE-CONVERT */
+BDESC (0, OPTION_MASK_ISA2_AVXNECONVERT, CODE_FOR_vbcstnebf162ps_v4sf, "__builtin_ia32_vbcstnebf162ps128", IX86_BUILTIN_VBCSTNEBF162PS128, UNKNOWN, (int) V4SF_FTYPE_PCSHORT)
+BDESC (0, OPTION_MASK_ISA2_AVXNECONVERT, CODE_FOR_vbcstnebf162ps_v8sf, "__builtin_ia32_vbcstnebf162ps256", IX86_BUILTIN_VBCSTNEBF162PS256, UNKNOWN, (int) V8SF_FTYPE_PCSHORT)
+BDESC (0, OPTION_MASK_ISA2_AVXNECONVERT, CODE_FOR_vbcstnesh2ps_v4sf, "__builtin_ia32_vbcstnesh2ps128", IX86_BUILTIN_VBCSTNESH2PS128, UNKNOWN, (int) V4SF_FTYPE_PCSHORT)
+BDESC (0, OPTION_MASK_ISA2_AVXNECONVERT, CODE_FOR_vbcstnesh2ps_v8sf, "__builtin_ia32_vbcstnesh2ps256", IX86_BUILTIN_VBCSTNESH2PS256, UNKNOWN, (int) V8SF_FTYPE_PCSHORT)
+BDESC (0, OPTION_MASK_ISA2_AVXNECONVERT, CODE_FOR_vcvtneebf162ps_v4sf, "__builtin_ia32_vcvtneebf162ps128", IX86_BUILTIN_VCVTNEEBF162PS128, UNKNOWN, (int) V4SF_FTYPE_PCV8BF)
+BDESC (0, OPTION_MASK_ISA2_AVXNECONVERT, CODE_FOR_vcvtneebf162ps_v8sf, "__builtin_ia32_vcvtneebf162ps256", IX86_BUILTIN_VCVTNEEBF162PS256, UNKNOWN, (int) V8SF_FTYPE_PCV16BF)
+BDESC (0, OPTION_MASK_ISA2_AVXNECONVERT, CODE_FOR_vcvtneeph2ps_v4sf, "__builtin_ia32_vcvtneeph2ps128", IX86_BUILTIN_VCVTNEEPH2PS128, UNKNOWN, (int) V4SF_FTYPE_PCV8HF)
+BDESC (0, OPTION_MASK_ISA2_AVXNECONVERT, CODE_FOR_vcvtneeph2ps_v8sf, "__builtin_ia32_vcvtneeph2ps256", IX86_BUILTIN_VCVTNEEPH2PS256, UNKNOWN, (int) V8SF_FTYPE_PCV16HF)
+BDESC (0, OPTION_MASK_ISA2_AVXNECONVERT, CODE_FOR_vcvtneobf162ps_v4sf, "__builtin_ia32_vcvtneobf162ps128", IX86_BUILTIN_VCVTNEOBF162PS128, UNKNOWN, (int) V4SF_FTYPE_PCV8BF)
+BDESC (0, OPTION_MASK_ISA2_AVXNECONVERT, CODE_FOR_vcvtneobf162ps_v8sf, "__builtin_ia32_vcvtneobf162ps256", IX86_BUILTIN_VCVTNEOBF162PS256, UNKNOWN, (int) V8SF_FTYPE_PCV16BF)
+BDESC (0, OPTION_MASK_ISA2_AVXNECONVERT, CODE_FOR_vcvtneoph2ps_v4sf, "__builtin_ia32_vcvtneoph2ps128", IX86_BUILTIN_VCVTNEOPH2PS128, UNKNOWN, (int) V4SF_FTYPE_PCV8HF)
+BDESC (0, OPTION_MASK_ISA2_AVXNECONVERT, CODE_FOR_vcvtneoph2ps_v8sf, "__builtin_ia32_vcvtneoph2ps256", IX86_BUILTIN_VCVTNEOPH2PS256, UNKNOWN, (int) V8SF_FTYPE_PCV16HF)
+
 /* AVX512BW */
 BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_loadv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCSHORT_V32HI_USI)
 BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_loadv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCCHAR_V64QI_UDI)
@@ -2809,6 +2823,10 @@  BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v4sf, "__builti
 BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v4sf_mask, "__builtin_ia32_dpbf16ps_v4sf_mask", IX86_BUILTIN_DPHI16PS_V4SF_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V8HI_V8HI_UQI)
 BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v4sf_maskz, "__builtin_ia32_dpbf16ps_v4sf_maskz", IX86_BUILTIN_DPHI16PS_V4SF_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V8HI_V8HI_UQI)
 
+/* AVX-NE-CONVERT */
+BDESC (0, OPTION_MASK_ISA2_AVXNECONVERT, CODE_FOR_avx_vcvtneps2bf16_v4sf, "__builtin_ia32_vcvtneps2bf16128", IX86_BUILTIN_VCVTNEPS2BF16128, UNKNOWN, (int) V8BF_FTYPE_V4SF)
+BDESC (0, OPTION_MASK_ISA2_AVXNECONVERT, CODE_FOR_avx_vcvtneps2bf16_v8sf, "__builtin_ia32_vcvtneps2bf16256", IX86_BUILTIN_VCVTNEPS2BF16256, UNKNOWN, (int) V8BF_FTYPE_V8SF)
+
 /* AVX512FP16.  */
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv8hf3_mask, "__builtin_ia32_addph128_mask", IX86_BUILTIN_ADDPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv16hf3_mask, "__builtin_ia32_addph256_mask", IX86_BUILTIN_ADDPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
index a9a35c0a18a..48934df664c 100644
--- a/gcc/config/i386/i386-c.cc
+++ b/gcc/config/i386/i386-c.cc
@@ -637,6 +637,8 @@  ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
     def_or_undef (parse_in, "__AVXIFMA__");
   if (isa_flag2 & OPTION_MASK_ISA2_AVXVNNIINT8)
     def_or_undef (parse_in, "__AVXVNNIINT8__");
+  if (isa_flag2 & OPTION_MASK_ISA2_AVXNECONVERT)
+    def_or_undef (parse_in, "__AVXNECONVERT__");
   if (TARGET_IAMCU)
     {
       def_or_undef (parse_in, "__iamcu");
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index a0f8a98986e..1e29fe584af 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -10427,7 +10427,9 @@  ix86_expand_args_builtin (const struct builtin_description *d,
     case V4DI_FTYPE_V4DI:
     case V16HI_FTYPE_V16SF:
     case V8HI_FTYPE_V8SF:
+    case V8BF_FTYPE_V8SF:
     case V8HI_FTYPE_V4SF:
+    case V8BF_FTYPE_V4SF:
       nargs = 1;
       break;
     case V4SF_FTYPE_V4SF_VEC_MERGE:
@@ -11860,6 +11862,12 @@  ix86_expand_special_args_builtin (const struct builtin_description *d,
     case V8SF_FTYPE_PCV4SF:
     case V8SF_FTYPE_PCFLOAT:
     case V4SF_FTYPE_PCFLOAT:
+    case V4SF_FTYPE_PCSHORT:
+    case V4SF_FTYPE_PCV8BF:
+    case V4SF_FTYPE_PCV8HF:
+    case V8SF_FTYPE_PCSHORT:
+    case V8SF_FTYPE_PCV16BF:
+    case V8SF_FTYPE_PCV16HF:
     case V4DF_FTYPE_PCV2DF:
     case V4DF_FTYPE_PCDOUBLE:
     case V2DF_FTYPE_PCDOUBLE:
diff --git a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def
index c95b917c6ce..4ea3f96f69f 100644
--- a/gcc/config/i386/i386-isa.def
+++ b/gcc/config/i386/i386-isa.def
@@ -111,3 +111,4 @@  DEF_PTA(AVXVNNI)
 DEF_PTA(AVX512FP16)
 DEF_PTA(AVXIFMA)
 DEF_PTA(AVXVNNIINT8)
+DEF_PTA(AVXNECONVERT)
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index 3e6d04433a6..e59e2d8aeaf 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -228,7 +228,8 @@  static struct ix86_target_opts isa2_opts[] =
   { "-mavxvnni",	OPTION_MASK_ISA2_AVXVNNI },
   { "-mavx512fp16",	OPTION_MASK_ISA2_AVX512FP16 },
   { "-mavxifma",	OPTION_MASK_ISA2_AVXIFMA },
-  { "-mavxvnniint8",	OPTION_MASK_ISA2_AVXVNNIINT8 }
+  { "-mavxvnniint8",	OPTION_MASK_ISA2_AVXVNNIINT8 },
+  { "-mavxneconvert",   OPTION_MASK_ISA2_AVXNECONVERT }
 };
 static struct ix86_target_opts isa_opts[] =
 {
@@ -1076,6 +1077,7 @@  ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
     IX86_ATTR_ISA ("avx512fp16", OPT_mavx512fp16),
     IX86_ATTR_ISA ("avxifma", OPT_mavxifma),
     IX86_ATTR_ISA ("avxvnniint8", OPT_mavxvnniint8),
+    IX86_ATTR_ISA ("avxneconvert",   OPT_mavxneconvert),
 
     /* enum options */
     IX86_ATTR_ENUM ("fpmath=",	OPT_mfpmath_),
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 53d534f6392..6e07b89ac4c 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -1224,3 +1224,8 @@  mavxvnniint8
 Target Mask(ISA2_AVXVNNIINT8) Var(ix86_isa_flags2) Save
 Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and
 AVXVNNIINT8 built-in functions and code generation.
+
+mavxneconvert
+Target Mask(ISA2_AVXNECONVERT) Var(ix86_isa_flags2) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, and
+AVXNECONVERT build-in functions and code generation.
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index c62d50f1951..d7433f639c8 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -124,6 +124,10 @@ 
 #include <avx512bf16intrin.h>
 #endif
 
+#ifdef __AVX2__
+#include <avxneconvertintrin.h>
+#endif
+
 #include <amxtileintrin.h>
 
 #include <amxint8intrin.h>
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 49490a213ea..bef4447de62 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -171,6 +171,14 @@ 
   UNSPEC_VPMADDWDACCD
   UNSPEC_VPMADDWDACCSSD
 
+  ;; For AVXNECONVERT support
+  UNSPEC_VCVTNEBF16SF
+  UNSPEC_VCVTNESHSF
+  UNSPEC_VCVTNEEBF16SF
+  UNSPEC_VCVTNEEPHSF
+  UNSPEC_VCVTNEOBF16SF
+  UNSPEC_VCVTNEOPHSF
+
   ;; For VAES support
   UNSPEC_VAESDEC
   UNSPEC_VAESDECLAST
@@ -28930,9 +28938,69 @@ 
 ;; Converting from SF to BF
 (define_mode_attr sf_cvt_bf16
   [(V4SF  "V8HI") (V8SF  "V8HI") (V16SF  "V16HI")])
+(define_mode_attr sf_cvt_bfloat16
+  [(V4SF  "V8BF") (V8SF  "V8BF")])
 ;; Mapping from BF to SF
 (define_mode_attr sf_bf16
   [(V4SF  "V8HI") (V8SF  "V16HI") (V16SF  "V32HI")])
+(define_mode_attr sf_bfloat16
+  [(V4SF  "V8BF") (V8SF  "V16BF") (V16SF  "V32BF")])
+;; Mapping from PH to SF
+(define_mode_attr ph_cvt_sf
+  [(V4SF  "V8HF") (V8SF  "V16HF")])
+
+(define_int_iterator VBCSTNE
+  [UNSPEC_VCVTNEBF16SF
+   UNSPEC_VCVTNESHSF])
+
+(define_int_attr vbcstnetype
+  [(UNSPEC_VCVTNEBF16SF "bf16") (UNSPEC_VCVTNESHSF "sh")])
+
+(define_insn "vbcstne<vbcstnetype>2ps_<mode>"
+  [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
+    (vec_duplicate:VF1_128_256
+     (unspec:SF
+      [(match_operand:HI 1 "memory_operand" "m")]
+      VBCSTNE)))]
+  "TARGET_AVXNECONVERT"
+  "vbcstne<vbcstnetype>2ps\t{%1, %0|%0, %1}"
+  [(set_attr "prefix" "vex")
+  (set_attr "mode" "<sseinsnmode>")])
+
+(define_int_iterator VCVTNEBF16
+  [UNSPEC_VCVTNEEBF16SF
+   UNSPEC_VCVTNEOBF16SF])
+
+(define_int_attr vcvtnebf16type
+  [(UNSPEC_VCVTNEEBF16SF "ebf16")
+   (UNSPEC_VCVTNEOBF16SF "obf16")])
+(define_insn "vcvtne<vcvtnebf16type>2ps_<mode>"
+  [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
+    (unspec:VF1_128_256
+      [(match_operand:<sf_bfloat16> 1 "memory_operand" "m")]
+     VCVTNEBF16))]
+  "TARGET_AVXNECONVERT"
+  "vcvtne<vcvtnebf16type>2ps\t{%1, %0|%0, %1}"
+  [(set_attr "prefix" "vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_int_iterator VCVTNEPH
+  [UNSPEC_VCVTNEEPHSF
+   UNSPEC_VCVTNEOPHSF])
+
+(define_int_attr vcvtnephtype
+  [(UNSPEC_VCVTNEEPHSF "eph")
+   (UNSPEC_VCVTNEOPHSF "oph")])
+
+(define_insn "vcvtne<vcvtnephtype>2ps_<mode>"
+  [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
+    (unspec:VF1_128_256
+     [(match_operand:<ph_cvt_sf> 1 "memory_operand" "m")]
+   VCVTNEPH))]
+  "TARGET_AVXNECONVERT"
+  "vcvtne<vcvtnephtype>2ps\t{%1, %0|%0, %1}"
+  [(set_attr "prefix" "vex")
+   (set_attr "mode" "<sseinsnmode>")])
 
 (define_expand "avx512f_cvtne2ps2bf16_<mode>_maskz"
   [(match_operand:BF16 0 "register_operand")
@@ -28966,13 +29034,41 @@ 
   DONE;
 })
 
-(define_insn "avx512f_cvtneps2bf16_<mode><mask_name>"
+(define_insn "avx_vcvtneps2bf16_<mode>"
+  [(set (match_operand:<sf_cvt_bfloat16> 0 "register_operand" "=v")
+    (unspec:<sf_cvt_bfloat16>
+     [(match_operand:VF1_128_256 1 "register_operand" "v")]
+     UNSPEC_VCVTNEPS2BF16))]
+  "TARGET_AVXNECONVERT"
+  "%{vex%} vcvtneps2bf16\t{%1, %0|%0, %1}"
+  [(set_attr "prefix" "vex")])
+
+(define_insn "avx512f_cvtneps2bf16_<mode>"
   [(set (match_operand:<sf_cvt_bf16> 0 "register_operand" "=v")
 	(unspec:<sf_cvt_bf16>
 	  [(match_operand:VF1_AVX512VL 1 "register_operand" "v")]
         UNSPEC_VCVTNEPS2BF16))]
   "TARGET_AVX512BF16"
-  "vcvtneps2bf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
+  {
+    if (<MODE_SIZE> <=32
+	&& TARGET_AVXNECONVERT
+	&& !EXT_REX_SSE_REG_P (operands[0])
+	&& !EXT_REX_SSE_REG_P (operands[1]))
+      return "%{vex%} vcvtneps2bf16\t{%1, %0|%0, %1}";
+    else
+      return "vcvtneps2bf16\t{%1, %0|%0, %1}";
+  })
+
+(define_insn "avx512f_cvtneps2bf16_<mode>_mask"
+  [(set (match_operand:<sf_cvt_bf16> 0 "register_operand" "=v")
+    (vec_merge:<sf_cvt_bf16>
+      (unspec:<sf_cvt_bf16>
+	  [(match_operand:VF1_AVX512VL 1 "register_operand" "v")]
+	    UNSPEC_VCVTNEPS2BF16)
+      (match_operand:<sf_cvt_bf16> 2 "nonimm_or_0_operand" "0C")
+      (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
+  "TARGET_AVX512BF16"
+  "vcvtneps2bf16\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}")
 
 (define_expand "avx512f_dpbf16ps_<mode>_maskz"
   [(match_operand:VF1_AVX512VL 0 "register_operand")
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 9a8de9fc226..0a4396f92bb 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -7070,6 +7070,11 @@  Enable/disable the generation of the AVXIFMA instructions.
 @cindex @code{target("avxvnniint8")} function attribute, x86
 Enable/disable the generation of the AVXVNNIINT8 instructions.
 
+@item avxneconvert
+@itemx no-avxneconvert
+@cindex @code{target("avxneconvert")} function attribute, x86
+Enable/disable the generation of the AVXNECONVERT instructions.
+
 @item cld
 @itemx no-cld
 @cindex @code{target("cld")} function attribute, x86
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index d4ff7549bf3..307fb7fa441 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1436,7 +1436,7 @@  See RS/6000 and PowerPC Options.
 -mavx5124fmaps  -mavx512vnni  -mavx5124vnniw  -mprfchw  -mrdpid @gol
 -mrdseed  -msgx -mavx512vp2intersect -mserialize -mtsxldtrk@gol
 -mamx-tile  -mamx-int8  -mamx-bf16 -muintr -mhreset -mavxvnni@gol
--mavx512fp16 -mavxifma -mavxvnniint8 @gol
+-mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert @gol
 -mcldemote  -mms-bitfields  -mno-align-stringops  -minline-all-stringops @gol
 -minline-stringops-dynamically  -mstringop-strategy=@var{alg} @gol
 -mkl -mwidekl @gol
@@ -32899,6 +32899,9 @@  preferred alignment to @option{-mpreferred-stack-boundary=2}.
 @need 200
 @itemx -mavxvnniint8
 @opindex mavxvnniint8
+@need 200
+@itemx -mavxneconvert
+@opindex mavxneconvert
 These switches enable the use of instructions in the MMX, SSE,
 SSE2, SSE3, SSSE3, SSE4, SSE4A, SSE4.1, SSE4.2, AVX, AVX2, AVX512F, AVX512PF,
 AVX512ER, AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA,
@@ -32909,8 +32912,8 @@  XSAVEOPT, XSAVEC, XSAVES, RTM, HLE, TBM, MWAITX, CLZERO, PKU, AVX512VBMI2,
 GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16,
 ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, SERIALIZE,
 UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512FP16,
-AVXIFMA, AVXVNNIINT8 or CLDEMOTE extended instruction sets. Each has a
-corresponding @option{-mno-} option to disable use of these instructions.
+AVXIFMA, AVXVNNIINT8, AVXNECONVERT or CLDEMOTE extended instruction sets. Each
+has a corresponding @option{-mno-} option to disable use of these instructions.
 
 These extensions are also available as built-in functions: see
 @ref{x86 Built-in Functions}, for details of the functions enabled and
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index e21a1d381e0..a12175b6498 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -2493,6 +2493,9 @@  Target supports the execution of @code{avx512vp2intersect} instructions.
 @item avxifma
 Target supports the execution of @code{avxifma} instructions.
 
+@item avxneconvert
+Target supports the execution of @code{avxneconvert} instructions.
+
 @item avxvnniint8
 Target supports the execution of @code{avxvnniint8} instructions.
 
diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C
index ebd01fe47bc..dd3e71f25ed 100644
--- a/gcc/testsuite/g++.dg/other/i386-2.C
+++ b/gcc/testsuite/g++.dg/other/i386-2.C
@@ -1,5 +1,5 @@ 
 /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt  -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8" } */
+/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt  -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert" } */
 
 /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
    xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C
index b66498f1d4c..cd7045cc4e4 100644
--- a/gcc/testsuite/g++.dg/other/i386-3.C
+++ b/gcc/testsuite/g++.dg/other/i386-3.C
@@ -1,5 +1,5 @@ 
 /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8" } */
+/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert" } */
 
 /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
    xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
diff --git a/gcc/testsuite/gcc.target/i386/avx-check.h b/gcc/testsuite/gcc.target/i386/avx-check.h
index 77507ca2edc..666eff50780 100644
--- a/gcc/testsuite/gcc.target/i386/avx-check.h
+++ b/gcc/testsuite/gcc.target/i386/avx-check.h
@@ -28,6 +28,9 @@  main ()
 #endif
 #ifdef AVXVNNIINT8
       && __builtin_cpu_supports ("avxvnniint8")
+#endif
+#ifdef AVXNECONVERT
+      && __builtin_cpu_supports ("avxneconvert")
 #endif
       )
     {
diff --git a/gcc/testsuite/gcc.target/i386/avx-ne-convert-1.c b/gcc/testsuite/gcc.target/i386/avx-ne-convert-1.c
new file mode 100644
index 00000000000..b1848037e81
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx-ne-convert-1.c
@@ -0,0 +1,45 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mavxneconvert -O2" } */
+/* { dg-final { scan-assembler-times "vbcstnebf162ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vbcstnebf162ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vbcstnesh2ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vbcstnesh2ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneebf162ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneebf162ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneeph2ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneeph2ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneobf162ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneobf162ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneoph2ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneoph2ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+#include <immintrin.h>
+
+volatile __m128 x1;
+volatile __m256 x2;
+volatile __m128bf16 res1, res2;
+const void *a;
+__m128bf16 *b;
+__m256bf16 *c;
+__m128h *d;
+__m256h *e;
+
+void extern
+avx_ne_convert_test (void)
+{
+  x1 = _mm_bcstnebf16_ps (a);
+  x2 = _mm256_bcstnebf16_ps (a);
+  x1 = _mm_bcstnesh_ps (a);
+  x2 = _mm256_bcstnesh_ps (a);
+  x1 = _mm_cvtneebf16_ps (b);
+  x2 = _mm256_cvtneebf16_ps (c);
+  x1 = _mm_cvtneeph_ps (d);
+  x2 = _mm256_cvtneeph_ps (e);
+  x1 = _mm_cvtneobf16_ps (b);
+  x2 = _mm256_cvtneobf16_ps (c);
+  x1 = _mm_cvtneoph_ps (d);
+  x2 = _mm256_cvtneoph_ps (e);
+  res1 = _mm_cvtneps_avx_pbh (x1);
+  res2 = _mm256_cvtneps_avx_pbh (x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx-ne-convert-vbcstnebf162ps-2.c b/gcc/testsuite/gcc.target/i386/avx-ne-convert-vbcstnebf162ps-2.c
new file mode 100644
index 00000000000..2707c58f7cd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx-ne-convert-vbcstnebf162ps-2.c
@@ -0,0 +1,54 @@ 
+/* { dg-do run } */
+/* { dg-options "-mavxneconvert -O2" } */
+/* { dg-require-effective-target avxneconvert } */
+#define AVXNECONVERT
+#include <stdint.h>
+
+#ifndef CHECK
+#define CHECK "avx-check.h"
+#endif
+
+#ifndef TEST
+#define TEST avx_test
+#endif
+
+#include CHECK
+
+typedef union
+{
+  uint32_t int32;
+  float flt;
+} float_int_t;
+
+static uint16_t convert_fp32_to_bf16 (float fp)
+{
+  float_int_t fi;
+  fi.flt = fp;
+  return ((fi.int32 >> 16) & 0xffff);
+}
+
+void TEST (void)
+{
+  union128 dst_128;
+  union256 dst_256;
+  float res_ref_128[4], res_ref_256[8], fp32;
+  uint16_t var;
+  fp32 = (float) 3 * 2 + 5.5;
+  for (int i = 0; i < 4; i++)
+  {
+    res_ref_128[i] = fp32;
+    dst_128.a[i] = 117;
+  }
+  for (int i = 0; i < 8; i++)
+  {
+    res_ref_256[i] = fp32;
+    dst_256.a[i] = 117;
+  }
+  var = convert_fp32_to_bf16 (fp32);
+  dst_128.x = _mm_bcstnebf16_ps (&var);
+  dst_256.x = _mm256_bcstnebf16_ps (&var);
+  if (check_union128 (dst_128, res_ref_128))
+    abort();
+  if (check_union256 (dst_256, res_ref_256))
+    abort();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx-ne-convert-vbcstnesh2ps-2.c b/gcc/testsuite/gcc.target/i386/avx-ne-convert-vbcstnesh2ps-2.c
new file mode 100644
index 00000000000..0e6f38334b8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx-ne-convert-vbcstnesh2ps-2.c
@@ -0,0 +1,42 @@ 
+/* { dg-do run } */
+/* { dg-options "-mavxneconvert -mf16c -O2" } */
+/* { dg-require-effective-target avxneconvert } */
+#define AVXNECONVERT
+#include <stdint.h>
+#include <immintrin.h>
+
+#ifndef CHECK
+#define CHECK "avx-check.h"
+#endif
+
+#ifndef TEST
+#define TEST avx_test
+#endif
+
+#include CHECK
+
+void TEST (void)
+{
+  union128 dst_128;
+  union256 dst_256;
+  float res_ref_128[4], res_ref_256[8], fp32;
+  uint16_t var;
+  fp32 = (float) 3 * 2 + 8.5;
+  for (int i = 0; i < 4; i++)
+  {
+    res_ref_128[i] = fp32;
+    dst_128.a[i] = 117;
+  }
+  for (int i = 0; i < 8; i++)
+  {
+    res_ref_256[i] = fp32;
+    dst_256.a[i] = 117;
+  }
+  var = _cvtss_sh (fp32, 0);
+  dst_128.x = _mm_bcstnesh_ps (&var);
+  dst_256.x = _mm256_bcstnesh_ps (&var);
+  if (check_union128 (dst_128, res_ref_128))
+    abort();
+  if (check_union256 (dst_256, res_ref_256))
+    abort();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneebf162ps-2.c b/gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneebf162ps-2.c
new file mode 100644
index 00000000000..c80f3fdedec
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneebf162ps-2.c
@@ -0,0 +1,73 @@ 
+/* { dg-do run } */
+/* { dg-options "-mavxneconvert -O2" } */
+/* { dg-require-effective-target avxneconvert } */
+#define AVXNECONVERT
+#include <stdint.h>
+
+#ifndef CHECK
+#define CHECK "avx-check.h"
+#endif
+
+#ifndef TEST
+#define TEST avx_test
+#endif
+
+#include CHECK
+
+typedef union
+{
+  uint32_t int32;
+  float flt;
+} float_int_t;
+
+typedef union
+{
+  __m128bf16 x;
+  uint32_t a[4];
+} union128bf16_i;
+
+typedef union
+{
+  __m256bf16 x;
+  uint32_t a[8];
+} union256bf16_i;
+
+static uint16_t convert_fp32_to_bf16 (float fp)
+{
+  float_int_t fi;
+  fi.flt = fp;
+  return ((fi.int32 >> 16) & 0xffff);
+}
+
+void TEST (void)
+{
+  union128 dst_128;
+  union256 dst_256;
+  float res_ref_128[4], res_ref_256[8], fp32;
+  uint16_t bf16;
+  union128bf16_i src_128bh;
+  union256bf16_i src_256bh;
+
+  for (int i = 0; i < 4; i++)
+  {
+    fp32 = (float) 3 * i + 5 + i * 0.5;
+    bf16 = convert_fp32_to_bf16 (fp32);
+    src_128bh.a[i] = bf16; // store bf16 at the lower part of the dword
+    res_ref_128[i] = fp32;
+    dst_128.a[i] = 117;
+  }
+  for (int i = 0; i < 8; i++)
+  {
+    fp32 = (float) 3 * i + 5 + i * 0.5;
+    bf16 = convert_fp32_to_bf16 (fp32);
+    src_256bh.a[i] = bf16; // store bf16 at the lower part of the dword
+    res_ref_256[i] = fp32;
+    dst_256.a[i] = 117;
+  }
+  dst_128.x = _mm_cvtneebf16_ps (&src_128bh.x);
+  dst_256.x = _mm256_cvtneebf16_ps (&src_256bh.x);
+  if (check_union128 (dst_128, res_ref_128))
+    abort();
+  if (check_union256 (dst_256, res_ref_256))
+    abort();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneeph2ps-2.c b/gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneeph2ps-2.c
new file mode 100644
index 00000000000..a862894746d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneeph2ps-2.c
@@ -0,0 +1,66 @@ 
+/* { dg-do run } */
+/* { dg-options "-mavxneconvert -mf16c -O2" } */
+/* { dg-require-effective-target avxneconvert } */
+#define AVXNECONVERT
+#include <stdint.h>
+
+#ifndef CHECK
+#define CHECK "avx-check.h"
+#endif
+
+#ifndef TEST
+#define TEST avx_test
+#endif
+
+#include CHECK
+
+typedef union
+{
+  uint32_t int32;
+  float flt;
+} float_int_t;
+
+typedef union
+{
+  __m128h x;
+  uint32_t a[4];
+} union128h;
+
+typedef union
+{
+  __m256h x;
+  uint32_t a[8];
+} union256h;
+
+void TEST (void)
+{
+  union128 dst_128;
+  union256 dst_256;
+  float res_ref_128[4], res_ref_256[8], fp32;
+  uint16_t fp16;
+  union128h src_128h;
+  union256h src_256h;
+
+  for (int i = 0; i < 4; i++)
+  {
+    fp32 = (float) 3 * i + 5 + i * 0.5;
+    fp16 = _cvtss_sh (fp32, 0);
+    src_128h.a[i] = fp16;
+    res_ref_128[i] = fp32;
+    dst_128.a[i] = 117;
+  }
+  for (int i = 0; i < 8; i++)
+  {
+    fp32 = (float) 3 * i + 5 + i * 0.5;
+    fp16 = _cvtss_sh (fp32, 0);
+    src_256h.a[i] = fp16;
+    res_ref_256[i] = fp32;
+    dst_256.a[i] = 117;
+  }
+  dst_128.x = _mm_cvtneeph_ps (&src_128h.x);
+  dst_256.x = _mm256_cvtneeph_ps (&src_256h.x);
+  if (check_union128 (dst_128, res_ref_128))
+    abort();
+  if (check_union256 (dst_256, res_ref_256))
+    abort();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneobf162ps-2.c b/gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneobf162ps-2.c
new file mode 100644
index 00000000000..d95aee067ae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneobf162ps-2.c
@@ -0,0 +1,75 @@ 
+/* { dg-do run } */
+/* { dg-options "-mavxneconvert -O2" } */
+/* { dg-require-effective-target avxneconvert } */
+#define AVXNECONVERT
+#include <stdint.h>
+
+#ifndef CHECK
+#define CHECK "avx-check.h"
+#endif
+
+#ifndef TEST
+#define TEST avx_test
+#endif
+
+#include CHECK
+
+typedef union
+{
+  uint32_t int32;
+  float flt;
+} float_int_t;
+
+typedef union
+{
+  __m128bf16 x;
+  uint32_t a[4];
+} union128bf16_i;
+
+typedef union
+{
+  __m256bf16 x;
+  uint32_t a[8];
+} union256bf16_i;
+
+static uint16_t convert_fp32_to_bf16 (float fp)
+{
+  float_int_t fi;
+  fi.flt = fp;
+  return ((fi.int32 >> 16) & 0xffff);
+}
+
+void TEST (void)
+{
+  union128 dst_128;
+  union256 dst_256;
+  float res_ref_128[4], res_ref_256[8], fp32;
+  uint16_t bf16;
+  union128bf16_i src_128bh;
+  union256bf16_i src_256bh;
+
+  for (int i = 0; i < 4; i++)
+  {
+    fp32 = (float) 3 * i + 5 + i * 0.5;
+    bf16 = convert_fp32_to_bf16 (fp32);
+    // store bf16 at the upper part of the dword
+    src_128bh.a[i] = (bf16 << 16) & 0xffff0000;
+    res_ref_128[i] = fp32;
+    dst_128.a[i] = 117;
+  }
+  for (int i = 0; i < 8; i++)
+  {
+    fp32 = (float) 3 * i + 5 + i * 0.5;
+    bf16 = convert_fp32_to_bf16 (fp32);
+    // store bf16 at the upper part of the dword
+    src_256bh.a[i] = (bf16 << 16) & 0xffff0000;
+    res_ref_256[i] = fp32;
+    dst_256.a[i] = 117;
+  }
+  dst_128.x = _mm_cvtneobf16_ps (&src_128bh.x);
+  dst_256.x = _mm256_cvtneobf16_ps (&src_256bh.x);
+  if (check_union128 (dst_128, res_ref_128))
+    abort();
+  if (check_union256 (dst_256, res_ref_256))
+    abort();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneoph2ps-2.c b/gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneoph2ps-2.c
new file mode 100644
index 00000000000..95eb5d74765
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneoph2ps-2.c
@@ -0,0 +1,66 @@ 
+/* { dg-do run } */
+/* { dg-options "-mavxneconvert -mf16c -O2" } */
+/* { dg-require-effective-target avxneconvert } */
+#define AVXNECONVERT
+#include <stdint.h>
+
+#ifndef CHECK
+#define CHECK "avx-check.h"
+#endif
+
+#ifndef TEST
+#define TEST avx_test
+#endif
+
+#include CHECK
+
+typedef union
+{
+  uint32_t int32;
+  float flt;
+} float_int_t;
+
+typedef union
+{
+  __m128h x;
+  uint32_t a[4];
+} union128h;
+
+typedef union
+{
+  __m256h x;
+  uint32_t a[8];
+} union256h;
+
+void TEST (void)
+{
+  union128 dst_128;
+  union256 dst_256;
+  float res_ref_128[4], res_ref_256[8], fp32;
+  uint16_t fp16;
+  union128h src_128h;
+  union256h src_256h;
+
+  for (int i = 0; i < 4; i++)
+  {
+    fp32 = (float) 3 * i + 5 + i * 0.5;
+    fp16 = _cvtss_sh (fp32, 0);
+    src_128h.a[i] = fp16 << 16;
+    res_ref_128[i] = fp32;
+    dst_128.a[i] = 117;
+  }
+  for (int i = 0; i < 8; i++)
+  {
+    fp32 = (float) 3 * i + 5 + i * 0.5;
+    fp16 = _cvtss_sh (fp32, 0);
+    src_256h.a[i] = fp16 << 16;
+    res_ref_256[i] = fp32;
+    dst_256.a[i] = 117;
+  }
+  dst_128.x = _mm_cvtneoph_ps (&src_128h.x);
+  dst_256.x = _mm256_cvtneoph_ps (&src_256h.x);
+  if (check_union128 (dst_128, res_ref_128))
+    abort();
+  if (check_union256 (dst_256, res_ref_256))
+    abort();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneps2bf16-2.c b/gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneps2bf16-2.c
new file mode 100644
index 00000000000..0861521111a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx-ne-convert-vcvtneps2bf16-2.c
@@ -0,0 +1,58 @@ 
+/* { dg-do run } */
+/* { dg-options "-mavxneconvert -O2" } */
+/* { dg-require-effective-target avxneconvert } */
+#define AVXNECONVERT
+#include <stdint.h>
+
+#ifndef CHECK
+#define CHECK "avx-check.h"
+#endif
+
+#ifndef TEST
+#define TEST avx_test
+#endif
+
+#include CHECK
+
+typedef union
+{
+  uint32_t int32;
+  float flt;
+} float_int_t;
+
+typedef union
+{
+  __m128bf16  x;
+  unsigned short a[8];
+} union128bf16;
+
+void TEST (void)
+{
+  union128 src_128;
+  union256 src_256;
+  union128bf16 dst_128, dst_256;
+  uint16_t res_ref_128[8] = {0}, res_ref_256[8];
+  float_int_t fp32;
+  for (int i = 0; i < 4; i++)
+  {
+    fp32.flt = (float) 2 * i + 7 + i * 0.25;
+    src_128.a[i] = fp32.flt;
+    res_ref_128[i] = fp32.int32 >> 16;
+    dst_128.a[i] = 117;
+  }
+
+  for (int i = 0; i < 8; i++)
+  {
+    fp32.flt = (float) 2 * i + 7 + i * 0.25;
+    src_256.a[i] = fp32.flt;
+    res_ref_256[i] = fp32.int32 >> 16;
+    dst_256.a[i] = 117;
+  }
+  dst_128.x = _mm_cvtneps_avx_pbh (src_128.x);
+  dst_256.x = _mm256_cvtneps_avx_pbh (src_256.x);
+
+  if (checkVus (dst_128.a, res_ref_128, 8))
+    abort();
+  if (checkVus (dst_128.a, res_ref_128, 8))
+    abort();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1.c b/gcc/testsuite/gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1a.c
similarity index 100%
rename from gcc/testsuite/gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1.c
rename to gcc/testsuite/gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1a.c
diff --git a/gcc/testsuite/gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1b.c b/gcc/testsuite/gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1b.c
new file mode 100644
index 00000000000..8b5d6a644bc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1b.c
@@ -0,0 +1,27 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mavx512bf16 -mavx512vl -mavxneconvert -O2" } */
+/* { dg-final { scan-assembler-times "\{vex\} vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128bh res1, res2;
+volatile __m128 x1;
+volatile __m256 x2;
+volatile __mmask8 m8;
+
+void extern
+avx512bf16_test (void)
+{
+  res2 = _mm256_cvtneps_pbh (x2);
+  res2 = _mm256_mask_cvtneps_pbh (res2, m8, x2);
+  res2 = _mm256_maskz_cvtneps_pbh (m8, x2);
+
+  res1 = _mm_cvtneps_pbh (x1);
+  res1 = _mm_mask_cvtneps_pbh (res1, m8, x1);
+  res1 = _mm_maskz_cvtneps_pbh (m8, x1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
index a681bffe3e7..b3d33df7c9c 100644
--- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc
+++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
@@ -82,6 +82,7 @@  extern void test_avxvnni (void)			__attribute__((__target__("avxvnni")));
 extern void test_avx512fp16 (void)		__attribute__((__target__("avx512fp16")));
 extern void test_avxifma (void)			__attribute__((__target__("avxifma")));
 extern void test_avxvnniint8 (void)		__attribute__((__target__("avxvnniint8")));
+extern void test_avxneconvert (void)		__attribute__((__target__("avxneconvert")));
 
 extern void test_no_sgx (void)			__attribute__((__target__("no-sgx")));
 extern void test_no_avx5124fmaps(void)		__attribute__((__target__("no-avx5124fmaps")));
@@ -165,6 +166,7 @@  extern void test_no_avxvnni (void)		__attribute__((__target__("no-avxvnni")));
 extern void test_no_avx512fp16 (void)		__attribute__((__target__("no-avx512fp16")));
 extern void test_no_avxifma (void)		__attribute__((__target__("no-avxifma")));
 extern void test_no_avxvnniint8 (void)		__attribute__((__target__("no-avxvnniint8")));
+extern void test_no_avxneconvert (void)		__attribute__((__target__("no-avxneconvert")));
 
 extern void test_arch_nocona (void)		__attribute__((__target__("arch=nocona")));
 extern void test_arch_core2 (void)		__attribute__((__target__("arch=core2")));
diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c
index ddde2df6657..3eabc49a6ab 100644
--- a/gcc/testsuite/gcc.target/i386/sse-12.c
+++ b/gcc/testsuite/gcc.target/i386/sse-12.c
@@ -3,7 +3,7 @@ 
    popcntintrin.h gfniintrin.h and mm_malloc.h are usable
    with -O -std=c89 -pedantic-errors.  */
 /* { dg-do compile } */
-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8" } */
+/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert" } */
 
 #include <x86intrin.h>
 
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
index 2b293216c6f..b9cdfb690d1 100644
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8" } */
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert" } */
 /* { dg-add-options bind_pic_locally } */
 
 #include <mm_malloc.h>
diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c
index 78b51048b90..b6ee3806dcc 100644
--- a/gcc/testsuite/gcc.target/i386/sse-14.c
+++ b/gcc/testsuite/gcc.target/i386/sse-14.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8" } */
+/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert" } */
 /* { dg-add-options bind_pic_locally } */
 
 #include <mm_malloc.h>
diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c
index cc1c8cfa4be..71ac0f3da19 100644
--- a/gcc/testsuite/gcc.target/i386/sse-22.c
+++ b/gcc/testsuite/gcc.target/i386/sse-22.c
@@ -103,7 +103,7 @@ 
 
 
 #ifndef DIFFERENT_PRAGMAS
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8,avxneconvert")
 #endif
 
 /* Following intrinsics require immediate arguments.  They
@@ -220,7 +220,7 @@  test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1)
 
 /* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */
 #ifdef DIFFERENT_PRAGMAS
-#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8")
+#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8,avxneconvert")
 #endif
 #include <immintrin.h>
 test_1 (_cvtss_sh, unsigned short, float, 1)
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
index 270f4483491..898dde80c8f 100644
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
@@ -843,6 +843,6 @@ 
 #define __builtin_ia32_vpclmulqdq_v2di(A, B, C)  __builtin_ia32_vpclmulqdq_v2di(A, B, 1) 
 #define __builtin_ia32_vpclmulqdq_v8di(A, B, C)  __builtin_ia32_vpclmulqdq_v8di(A, B, 1) 
 
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8,avxneconvert")
 
 #include <x86intrin.h>
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 64ccfc746bd..9228e810c45 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -9530,6 +9530,18 @@  proc check_effective_target_avxvnniint8 { } {
     } "-O0 -mavxvnniint8" ]
 }
 
+# Return 1 if avxneconvert instructions can be compiled.
+proc check_effective_target_avxneconvert { } {
+    return [check_no_compiler_messages avxneconvert object {
+	typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
+	__m128
+	_mm_bcstnebf16_ps (const void *__P)
+	{
+	   return (__m128) __builtin_ia32_vbcstnebf162ps128 ((const short *) __P);
+	}
+    } "-O0 -mavxneconvert" ]
+}
+
 # Return 1 if sse instructions can be compiled.
 proc check_effective_target_sse { } {
     return [check_no_compiler_messages sse object {