[x86] Enable V4BFmode and V2BFmode.

Message ID 20221026022847.2932438-1-hongtao.liu@intel.com
State Accepted
Headers
Series [x86] Enable V4BFmode and V2BFmode. |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

liuhongt Oct. 26, 2022, 2:28 a.m. UTC
  Enable V4BFmode and V2BFmode with the same ABI as V4HFmode and
V2HFmode. No real operation is supported for them except for movement.
This should solve PR target/107261.

Also I notice there's redundancy in VALID_AVX512FP16_REG_MODE, and
remove V2BFmode remove it.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk?

gcc/ChangeLog:

	PR target/107261
	* config/i386/i386-modes.def (VECTOR_MODE): Support V2BFmode.
	* config/i386/i386.cc (classify_argument): Handle V4BFmode and
	V2BFmode.
	(ix86_convert_const_vector_to_integer): Ditto.
	* config/i386/i386.h (VALID_AVX512FP16_REG_MODE): Remove
	V2BFmode.
	(VALID_SSE2_REG_MODE): Add V4BFmode and V2BFmode.
	(VALID_MMX_REG_MODE): Add V4BFmode.
	* config/i386/i386.md (mode): Add V4BF and V2BF.
	(MODE_SIZE): Ditto.
	* config/i386/mmx.md (MMXMODE) Add V4BF.
	(V_32): Add V2BF.
	(V_16_32_64): Add V4BF and V2BF.
	(mmxinsnmode): Add V4BF and V2BF.
	(*mov<mode>_internal): Hanlde V4BFmode and V2BFmode.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr107261.c: New test.
---
 gcc/config/i386/i386-modes.def           |  1 +
 gcc/config/i386/i386.cc                  |  6 ++++
 gcc/config/i386/i386.h                   |  9 +++---
 gcc/config/i386/i386.md                  |  5 ++--
 gcc/config/i386/mmx.md                   | 26 +++++++++-------
 gcc/testsuite/gcc.target/i386/pr107261.c | 38 ++++++++++++++++++++++++
 6 files changed, 68 insertions(+), 17 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107261.c
  

Comments

Hongtao Liu Oct. 28, 2022, 2:58 a.m. UTC | #1
I'm going to check in this patch.

On Wed, Oct 26, 2022 at 10:30 AM liuhongt <hongtao.liu@intel.com> wrote:
>
> Enable V4BFmode and V2BFmode with the same ABI as V4HFmode and
> V2HFmode. No real operation is supported for them except for movement.
> This should solve PR target/107261.
>
> Also I notice there's redundancy in VALID_AVX512FP16_REG_MODE, and
> remove V2BFmode remove it.
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk?
>
> gcc/ChangeLog:
>
>         PR target/107261
>         * config/i386/i386-modes.def (VECTOR_MODE): Support V2BFmode.
>         * config/i386/i386.cc (classify_argument): Handle V4BFmode and
>         V2BFmode.
>         (ix86_convert_const_vector_to_integer): Ditto.
>         * config/i386/i386.h (VALID_AVX512FP16_REG_MODE): Remove
>         V2BFmode.
>         (VALID_SSE2_REG_MODE): Add V4BFmode and V2BFmode.
>         (VALID_MMX_REG_MODE): Add V4BFmode.
>         * config/i386/i386.md (mode): Add V4BF and V2BF.
>         (MODE_SIZE): Ditto.
>         * config/i386/mmx.md (MMXMODE) Add V4BF.
>         (V_32): Add V2BF.
>         (V_16_32_64): Add V4BF and V2BF.
>         (mmxinsnmode): Add V4BF and V2BF.
>         (*mov<mode>_internal): Hanlde V4BFmode and V2BFmode.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/pr107261.c: New test.
> ---
>  gcc/config/i386/i386-modes.def           |  1 +
>  gcc/config/i386/i386.cc                  |  6 ++++
>  gcc/config/i386/i386.h                   |  9 +++---
>  gcc/config/i386/i386.md                  |  5 ++--
>  gcc/config/i386/mmx.md                   | 26 +++++++++-------
>  gcc/testsuite/gcc.target/i386/pr107261.c | 38 ++++++++++++++++++++++++
>  6 files changed, 68 insertions(+), 17 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr107261.c
>
> diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def
> index b49daaef253..dbc3165c5fc 100644
> --- a/gcc/config/i386/i386-modes.def
> +++ b/gcc/config/i386/i386-modes.def
> @@ -93,6 +93,7 @@ VECTOR_MODES (FLOAT, 64);     /*  V32HF V16SF V8DF V4TF */
>  VECTOR_MODES (FLOAT, 128);    /* V64HF V32SF V16DF V8TF */
>  VECTOR_MODES (FLOAT, 256);    /* V128HF V64SF V32DF V16TF */
>  VECTOR_MODE (FLOAT, HF, 2);   /*                  V2HF */
> +VECTOR_MODE (FLOAT, BF, 2);   /*                  V2BF */
>  VECTOR_MODE (FLOAT, HF, 6);   /*                  V6HF */
>  VECTOR_MODE (INT, TI, 1);     /*                   V1TI */
>  VECTOR_MODE (INT, DI, 1);     /*                   V1DI */
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index aeea26ef4be..1aca7d55a09 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -2507,7 +2507,9 @@ classify_argument (machine_mode mode, const_tree type,
>      case E_V2SImode:
>      case E_V4HImode:
>      case E_V4HFmode:
> +    case E_V4BFmode:
>      case E_V2HFmode:
> +    case E_V2BFmode:
>      case E_V8QImode:
>        classes[0] = X86_64_SSE_CLASS;
>        return 1;
> @@ -2991,6 +2993,7 @@ pass_in_reg:
>      case E_V8QImode:
>      case E_V4HImode:
>      case E_V4HFmode:
> +    case E_V4BFmode:
>      case E_V2SImode:
>      case E_V2SFmode:
>      case E_V1TImode:
> @@ -3240,6 +3243,7 @@ pass_in_reg:
>      case E_V8QImode:
>      case E_V4HImode:
>      case E_V4HFmode:
> +    case E_V4BFmode:
>      case E_V2SImode:
>      case E_V2SFmode:
>      case E_V1TImode:
> @@ -15810,7 +15814,9 @@ ix86_convert_const_vector_to_integer (rtx op, machine_mode mode)
>         }
>        break;
>      case E_V2HFmode:
> +    case E_V2BFmode:
>      case E_V4HFmode:
> +    case E_V4BFmode:
>      case E_V2SFmode:
>        for (int i = 0; i < nunits; ++i)
>         {
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index fd7c9df47e5..16d9c606077 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -1033,13 +1033,12 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
>     || (MODE) == V8BFmode || (MODE) == TImode)
>
>  #define VALID_AVX512FP16_REG_MODE(MODE)                                        \
> -  ((MODE) == V8HFmode || (MODE) == V16HFmode || (MODE) == V32HFmode    \
> -   || (MODE) == V2HFmode)
> +  ((MODE) == V8HFmode || (MODE) == V16HFmode || (MODE) == V32HFmode)
>
>  #define VALID_SSE2_REG_MODE(MODE)                                      \
>    ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode     \
>     || (MODE) == V8HFmode || (MODE) == V4HFmode || (MODE) == V2HFmode   \
> -   || (MODE) == V8BFmode \
> +   || (MODE) == V8BFmode || (MODE) == V4BFmode || (MODE) == V2BFmode   \
>     || (MODE) == V4QImode || (MODE) == V2HImode || (MODE) == V1SImode   \
>     || (MODE) == V2DImode || (MODE) == V2QImode || (MODE) == DFmode     \
>     || (MODE) == HFmode || (MODE) == BFmode)
> @@ -1057,7 +1056,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
>    ((MODE) == V1DImode || (MODE) == DImode                              \
>     || (MODE) == V2SImode || (MODE) == SImode                           \
>     || (MODE) == V4HImode || (MODE) == V8QImode                         \
> -   || (MODE) == V4HFmode)
> +   || (MODE) == V4HFmode || (MODE) == V4BFmode)
>
>  #define VALID_MASK_REG_MODE(MODE) ((MODE) == HImode || (MODE) == QImode)
>
> @@ -1074,7 +1073,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
>     || (MODE) == CSImode || (MODE) == CDImode                           \
>     || (MODE) == SDmode || (MODE) == DDmode                             \
>     || (MODE) == HFmode || (MODE) == HCmode || (MODE) == BFmode         \
> -   || (MODE) == V2HImode || (MODE) == V2HFmode                         \
> +   || (MODE) == V2HImode || (MODE) == V2HFmode || (MODE) == V2BFmode   \
>     || (MODE) == V1SImode || (MODE) == V4QImode || (MODE) == V2QImode   \
>     || (TARGET_64BIT                                                    \
>         && ((MODE) == TImode || (MODE) == CTImode                       \
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index baf1f1f8fa2..e46554efea8 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -500,7 +500,7 @@ (define_attr "type"
>  ;; Main data type used by the insn
>  (define_attr "mode"
>    "unknown,none,QI,HI,SI,DI,TI,OI,XI,HF,BF,SF,DF,XF,TF,V32HF,V16HF,V8HF,
> -   V16SF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,V8DF,V4HF,V2HF"
> +   V16SF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,V8DF,V4HF,V4BF,V2HF,V2BF"
>    (const_string "unknown"))
>
>  ;; The CPU unit operations uses.
> @@ -1119,7 +1119,8 @@ (define_mode_attr MODE_SIZE [(QI "1") (HI "2") (SI "4") (DI "8")
>                              (V4SF "16") (V8SF "32") (V16SF "64")
>                              (V8HF "16") (V16HF "32") (V32HF "64")
>                              (V4HF "8") (V2HF "4")
> -                            (V8BF "16") (V16BF "32") (V32BF "64")])
> +                            (V8BF "16") (V16BF "32") (V32BF "64")
> +                            (V4BF "8") (V2BF "4")])
>
>  ;; Double word integer modes as mode attribute.
>  (define_mode_attr DWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI") (TI "OI")])
> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> index c359e2dd6de..d5134cc351e 100644
> --- a/gcc/config/i386/mmx.md
> +++ b/gcc/config/i386/mmx.md
> @@ -48,7 +48,7 @@ (define_mode_iterator MMXMODEI [V8QI V4HI V2SI])
>  (define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI (V1DI "TARGET_SSE2")])
>
>  ;; All 8-byte vector modes handled by MMX
> -(define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF V4HF])
> +(define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF V4HF V4BF])
>  (define_mode_iterator MMXMODE124 [V8QI V4HI V2SI V2SF])
>
>  ;; Mix-n-match
> @@ -58,7 +58,7 @@ (define_mode_iterator MMXMODE24 [V4HI V2SI])
>  (define_mode_iterator MMXMODE248 [V4HI V2SI V1DI])
>
>  ;; All 4-byte integer/float16 vector modes
> -(define_mode_iterator V_32 [V4QI V2HI V1SI V2HF])
> +(define_mode_iterator V_32 [V4QI V2HI V1SI V2HF V2BF])
>
>  ;; 4-byte integer vector modes
>  (define_mode_iterator VI_32 [V4QI V2HI])
> @@ -72,7 +72,8 @@ (define_mode_iterator VI1_16_32 [V4QI V2QI])
>  ;; All 2-byte, 4-byte and 8-byte vector modes with more than 1 element
>  (define_mode_iterator V_16_32_64
>     [V2QI V4QI V2HI V2HF
> -    (V8QI "TARGET_64BIT") (V4HI "TARGET_64BIT") (V4HF "TARGET_64BIT")
> +    (V8QI "TARGET_64BIT") (V4HI "TARGET_64BIT")
> +    (V4HF "TARGET_64BIT") (V4BF "TARGET_64BIT")
>      (V2SI "TARGET_64BIT") (V2SF "TARGET_64BIT")])
>
>  ;; V2S* modes
> @@ -92,6 +93,7 @@ (define_mode_attr mmxinsnmode
>     (V4HI "DI") (V2HI "SI")
>     (V2SI "DI")
>     (V4HF "DI") (V2HF "SI")
> +   (V4BF "DI") (V2BF "SI")
>     (V2SF "DI")])
>
>  (define_mode_attr mmxdoublemode
> @@ -213,9 +215,9 @@ (define_insn "*mov<mode>_internal"
>       (cond [(eq_attr "alternative" "2")
>               (const_string "SI")
>             (eq_attr "alternative" "11,12")
> -             (cond [(match_test "<MODE>mode == V2SFmode")
> -                      (const_string "V4SF")
> -                    (match_test "<MODE>mode == V4HFmode")
> +             (cond [(match_test "<MODE>mode == V2SFmode
> +                                 || <MODE>mode == V4HFmode
> +                                 || <MODE>mode == V4BFmode")
>                        (const_string "V4SF")
>                      (ior (not (match_test "TARGET_SSE2"))
>                           (match_test "optimize_function_for_size_p (cfun)"))
> @@ -227,13 +229,15 @@ (define_insn "*mov<mode>_internal"
>                  (ior (ior (and (match_test "<MODE>mode == V2SFmode")
>                                 (not (match_test "TARGET_MMX_WITH_SSE")))
>                            (not (match_test "TARGET_SSE2")))
> -                     (match_test "<MODE>mode == V4HFmode")))
> +                     (match_test "<MODE>mode == V4HFmode
> +                                 || <MODE>mode == V4BFmode")))
>               (const_string "V2SF")
>
>             (and (eq_attr "alternative" "14")
>                  (ior (ior (match_test "<MODE>mode == V2SFmode")
>                            (not (match_test "TARGET_SSE2")))
> -                     (match_test "<MODE>mode == V4HFmode")))
> +                     (match_test "<MODE>mode == V4HFmode
> +                                 || <MODE>mode == V4BFmode")))
>               (const_string "V2SF")
>            ]
>            (const_string "DI")))
> @@ -321,7 +325,8 @@ (define_insn "*mov<mode>_internal"
>         (const_string "*")))
>     (set (attr "mode")
>       (cond [(eq_attr "alternative" "2,3")
> -             (cond [(match_test "<MODE>mode == V2HFmode")
> +             (cond [(match_test "<MODE>mode == V2HFmode
> +                                || <MODE>mode == V2BFmode")
>                        (const_string "V4SF")
>                      (match_test "TARGET_AVX")
>                        (const_string "TI")
> @@ -332,7 +337,8 @@ (define_insn "*mov<mode>_internal"
>                     (const_string "TI"))
>
>             (and (eq_attr "alternative" "4,5")
> -                (ior (match_test "<MODE>mode == V2HFmode")
> +                (ior (match_test "<MODE>mode == V2HFmode
> +                                || <MODE>mode == V2BFmode")
>                       (not (match_test "TARGET_SSE2"))))
>               (const_string "SF")
>            ]
> diff --git a/gcc/testsuite/gcc.target/i386/pr107261.c b/gcc/testsuite/gcc.target/i386/pr107261.c
> new file mode 100644
> index 00000000000..eb1d232fbfc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr107261.c
> @@ -0,0 +1,38 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msse2" } */
> +
> +typedef __bf16 v4bf __attribute__ ((vector_size (8)));
> +typedef __bf16 v2bf __attribute__ ((vector_size (4)));
> +
> +v4bf
> +v4bf_abi_1 (v4bf a)
> +{
> +  return a;
> +}
> +
> +v4bf
> +v4bf_abi_3 (v4bf a, v4bf b, v4bf c)
> +{
> +  return c;
> +}
> +
> +/* { dg-final { scan-assembler-times "movq\[\\t \]*%mm2, %mm0" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "movaps\[\\t \]*%xmm2, %xmm0" 1 { target { ! ia32 } } } } */
> +
> +v4bf
> +v4bf_abi_4 (v4bf a, v4bf b, v4bf c, v4bf d)
> +{
> +  return d;
> +}
> +
> +/* { dg-final { scan-assembler-times "movq\[\\t \]*4\\(%esp\\), %mm0" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "movaps\[\\t \]*%xmm3, %xmm0" 1 { target { ! ia32 } } } } */
> +
> +v2bf
> +v2bf_test (v2bf a, v2bf b, v2bf c, v2bf d)
> +{
> +  return b;
> +}
> +
> +/* { dg-final { scan-assembler-times "movl\[\\t \]*8\\(%esp\\), %eax" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "movaps\[\\t \]*%xmm1, %xmm0" 1 { target { ! ia32 } } } } */
> --
> 2.27.0
>
  

Patch

diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def
index b49daaef253..dbc3165c5fc 100644
--- a/gcc/config/i386/i386-modes.def
+++ b/gcc/config/i386/i386-modes.def
@@ -93,6 +93,7 @@  VECTOR_MODES (FLOAT, 64);     /*  V32HF V16SF V8DF V4TF */
 VECTOR_MODES (FLOAT, 128);    /* V64HF V32SF V16DF V8TF */
 VECTOR_MODES (FLOAT, 256);    /* V128HF V64SF V32DF V16TF */
 VECTOR_MODE (FLOAT, HF, 2);   /* 	      	   V2HF */
+VECTOR_MODE (FLOAT, BF, 2);   /* 	      	   V2BF */
 VECTOR_MODE (FLOAT, HF, 6);   /*		   V6HF */
 VECTOR_MODE (INT, TI, 1);     /*                   V1TI */
 VECTOR_MODE (INT, DI, 1);     /*                   V1DI */
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index aeea26ef4be..1aca7d55a09 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -2507,7 +2507,9 @@  classify_argument (machine_mode mode, const_tree type,
     case E_V2SImode:
     case E_V4HImode:
     case E_V4HFmode:
+    case E_V4BFmode:
     case E_V2HFmode:
+    case E_V2BFmode:
     case E_V8QImode:
       classes[0] = X86_64_SSE_CLASS;
       return 1;
@@ -2991,6 +2993,7 @@  pass_in_reg:
     case E_V8QImode:
     case E_V4HImode:
     case E_V4HFmode:
+    case E_V4BFmode:
     case E_V2SImode:
     case E_V2SFmode:
     case E_V1TImode:
@@ -3240,6 +3243,7 @@  pass_in_reg:
     case E_V8QImode:
     case E_V4HImode:
     case E_V4HFmode:
+    case E_V4BFmode:
     case E_V2SImode:
     case E_V2SFmode:
     case E_V1TImode:
@@ -15810,7 +15814,9 @@  ix86_convert_const_vector_to_integer (rtx op, machine_mode mode)
 	}
       break;
     case E_V2HFmode:
+    case E_V2BFmode:
     case E_V4HFmode:
+    case E_V4BFmode:
     case E_V2SFmode:
       for (int i = 0; i < nunits; ++i)
 	{
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index fd7c9df47e5..16d9c606077 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1033,13 +1033,12 @@  extern const char *host_detect_local_cpu (int argc, const char **argv);
    || (MODE) == V8BFmode || (MODE) == TImode)
 
 #define VALID_AVX512FP16_REG_MODE(MODE)					\
-  ((MODE) == V8HFmode || (MODE) == V16HFmode || (MODE) == V32HFmode	\
-   || (MODE) == V2HFmode)
+  ((MODE) == V8HFmode || (MODE) == V16HFmode || (MODE) == V32HFmode)
 
 #define VALID_SSE2_REG_MODE(MODE)					\
   ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode	\
    || (MODE) == V8HFmode || (MODE) == V4HFmode || (MODE) == V2HFmode	\
-   || (MODE) == V8BFmode \
+   || (MODE) == V8BFmode || (MODE) == V4BFmode || (MODE) == V2BFmode	\
    || (MODE) == V4QImode || (MODE) == V2HImode || (MODE) == V1SImode	\
    || (MODE) == V2DImode || (MODE) == V2QImode || (MODE) == DFmode	\
    || (MODE) == HFmode || (MODE) == BFmode)
@@ -1057,7 +1056,7 @@  extern const char *host_detect_local_cpu (int argc, const char **argv);
   ((MODE) == V1DImode || (MODE) == DImode				\
    || (MODE) == V2SImode || (MODE) == SImode				\
    || (MODE) == V4HImode || (MODE) == V8QImode				\
-   || (MODE) == V4HFmode)
+   || (MODE) == V4HFmode || (MODE) == V4BFmode)
 
 #define VALID_MASK_REG_MODE(MODE) ((MODE) == HImode || (MODE) == QImode)
 
@@ -1074,7 +1073,7 @@  extern const char *host_detect_local_cpu (int argc, const char **argv);
    || (MODE) == CSImode || (MODE) == CDImode				\
    || (MODE) == SDmode || (MODE) == DDmode				\
    || (MODE) == HFmode || (MODE) == HCmode || (MODE) == BFmode		\
-   || (MODE) == V2HImode || (MODE) == V2HFmode				\
+   || (MODE) == V2HImode || (MODE) == V2HFmode || (MODE) == V2BFmode	\
    || (MODE) == V1SImode || (MODE) == V4QImode || (MODE) == V2QImode	\
    || (TARGET_64BIT							\
        && ((MODE) == TImode || (MODE) == CTImode			\
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index baf1f1f8fa2..e46554efea8 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -500,7 +500,7 @@  (define_attr "type"
 ;; Main data type used by the insn
 (define_attr "mode"
   "unknown,none,QI,HI,SI,DI,TI,OI,XI,HF,BF,SF,DF,XF,TF,V32HF,V16HF,V8HF,
-   V16SF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,V8DF,V4HF,V2HF"
+   V16SF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,V8DF,V4HF,V4BF,V2HF,V2BF"
   (const_string "unknown"))
 
 ;; The CPU unit operations uses.
@@ -1119,7 +1119,8 @@  (define_mode_attr MODE_SIZE [(QI "1") (HI "2") (SI "4") (DI "8")
 			     (V4SF "16") (V8SF "32") (V16SF "64")
 			     (V8HF "16") (V16HF "32") (V32HF "64")
 			     (V4HF "8") (V2HF "4")
-			     (V8BF "16") (V16BF "32") (V32BF "64")])
+			     (V8BF "16") (V16BF "32") (V32BF "64")
+			     (V4BF "8") (V2BF "4")])
 
 ;; Double word integer modes as mode attribute.
 (define_mode_attr DWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI") (TI "OI")])
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index c359e2dd6de..d5134cc351e 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -48,7 +48,7 @@  (define_mode_iterator MMXMODEI [V8QI V4HI V2SI])
 (define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI (V1DI "TARGET_SSE2")])
 
 ;; All 8-byte vector modes handled by MMX
-(define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF V4HF])
+(define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF V4HF V4BF])
 (define_mode_iterator MMXMODE124 [V8QI V4HI V2SI V2SF])
 
 ;; Mix-n-match
@@ -58,7 +58,7 @@  (define_mode_iterator MMXMODE24 [V4HI V2SI])
 (define_mode_iterator MMXMODE248 [V4HI V2SI V1DI])
 
 ;; All 4-byte integer/float16 vector modes
-(define_mode_iterator V_32 [V4QI V2HI V1SI V2HF])
+(define_mode_iterator V_32 [V4QI V2HI V1SI V2HF V2BF])
 
 ;; 4-byte integer vector modes
 (define_mode_iterator VI_32 [V4QI V2HI])
@@ -72,7 +72,8 @@  (define_mode_iterator VI1_16_32 [V4QI V2QI])
 ;; All 2-byte, 4-byte and 8-byte vector modes with more than 1 element
 (define_mode_iterator V_16_32_64
    [V2QI V4QI V2HI V2HF
-    (V8QI "TARGET_64BIT") (V4HI "TARGET_64BIT") (V4HF "TARGET_64BIT")
+    (V8QI "TARGET_64BIT") (V4HI "TARGET_64BIT")
+    (V4HF "TARGET_64BIT") (V4BF "TARGET_64BIT")
     (V2SI "TARGET_64BIT") (V2SF "TARGET_64BIT")])
 
 ;; V2S* modes
@@ -92,6 +93,7 @@  (define_mode_attr mmxinsnmode
    (V4HI "DI") (V2HI "SI")
    (V2SI "DI")
    (V4HF "DI") (V2HF "SI")
+   (V4BF "DI") (V2BF "SI")
    (V2SF "DI")])
 
 (define_mode_attr mmxdoublemode
@@ -213,9 +215,9 @@  (define_insn "*mov<mode>_internal"
      (cond [(eq_attr "alternative" "2")
 	      (const_string "SI")
 	    (eq_attr "alternative" "11,12")
-	      (cond [(match_test "<MODE>mode == V2SFmode")
-		       (const_string "V4SF")
-		     (match_test "<MODE>mode == V4HFmode")
+	      (cond [(match_test "<MODE>mode == V2SFmode
+				  || <MODE>mode == V4HFmode
+				  || <MODE>mode == V4BFmode")
 		       (const_string "V4SF")
 		     (ior (not (match_test "TARGET_SSE2"))
 			  (match_test "optimize_function_for_size_p (cfun)"))
@@ -227,13 +229,15 @@  (define_insn "*mov<mode>_internal"
 		 (ior (ior (and (match_test "<MODE>mode == V2SFmode")
 				(not (match_test "TARGET_MMX_WITH_SSE")))
 			   (not (match_test "TARGET_SSE2")))
-		      (match_test "<MODE>mode == V4HFmode")))
+		      (match_test "<MODE>mode == V4HFmode
+				  || <MODE>mode == V4BFmode")))
 	      (const_string "V2SF")
 
 	    (and (eq_attr "alternative" "14")
 		 (ior (ior (match_test "<MODE>mode == V2SFmode")
 			   (not (match_test "TARGET_SSE2")))
-		      (match_test "<MODE>mode == V4HFmode")))
+		      (match_test "<MODE>mode == V4HFmode
+				  || <MODE>mode == V4BFmode")))
 	      (const_string "V2SF")
 	   ]
 	   (const_string "DI")))
@@ -321,7 +325,8 @@  (define_insn "*mov<mode>_internal"
        (const_string "*")))
    (set (attr "mode")
      (cond [(eq_attr "alternative" "2,3")
-	      (cond [(match_test "<MODE>mode == V2HFmode")
+	      (cond [(match_test "<MODE>mode == V2HFmode
+				 || <MODE>mode == V2BFmode")
 		       (const_string "V4SF")
 		     (match_test "TARGET_AVX")
 		       (const_string "TI")
@@ -332,7 +337,8 @@  (define_insn "*mov<mode>_internal"
 		    (const_string "TI"))
 
 	    (and (eq_attr "alternative" "4,5")
-		 (ior (match_test "<MODE>mode == V2HFmode")
+		 (ior (match_test "<MODE>mode == V2HFmode
+				 || <MODE>mode == V2BFmode")
 		      (not (match_test "TARGET_SSE2"))))
 	      (const_string "SF")
 	   ]
diff --git a/gcc/testsuite/gcc.target/i386/pr107261.c b/gcc/testsuite/gcc.target/i386/pr107261.c
new file mode 100644
index 00000000000..eb1d232fbfc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107261.c
@@ -0,0 +1,38 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+typedef __bf16 v4bf __attribute__ ((vector_size (8)));
+typedef __bf16 v2bf __attribute__ ((vector_size (4)));
+
+v4bf
+v4bf_abi_1 (v4bf a)
+{
+  return a;
+}
+
+v4bf
+v4bf_abi_3 (v4bf a, v4bf b, v4bf c)
+{
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "movq\[\\t \]*%mm2, %mm0" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "movaps\[\\t \]*%xmm2, %xmm0" 1 { target { ! ia32 } } } } */
+
+v4bf
+v4bf_abi_4 (v4bf a, v4bf b, v4bf c, v4bf d)
+{
+  return d;
+}
+
+/* { dg-final { scan-assembler-times "movq\[\\t \]*4\\(%esp\\), %mm0" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "movaps\[\\t \]*%xmm3, %xmm0" 1 { target { ! ia32 } } } } */
+
+v2bf
+v2bf_test (v2bf a, v2bf b, v2bf c, v2bf d)
+{
+  return b;
+}
+
+/* { dg-final { scan-assembler-times "movl\[\\t \]*8\\(%esp\\), %eax" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "movaps\[\\t \]*%xmm1, %xmm0" 1 { target { ! ia32 } } } } */