[x86] Enable V4BFmode and V2BFmode.
Checks
Commit Message
Enable V4BFmode and V2BFmode with the same ABI as V4HFmode and
V2HFmode. No real operation is supported for them except for movement.
This should solve PR target/107261.
Also I notice there's redundancy in VALID_AVX512FP16_REG_MODE, and
remove V2BFmode remove it.
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk?
gcc/ChangeLog:
PR target/107261
* config/i386/i386-modes.def (VECTOR_MODE): Support V2BFmode.
* config/i386/i386.cc (classify_argument): Handle V4BFmode and
V2BFmode.
(ix86_convert_const_vector_to_integer): Ditto.
* config/i386/i386.h (VALID_AVX512FP16_REG_MODE): Remove
V2BFmode.
(VALID_SSE2_REG_MODE): Add V4BFmode and V2BFmode.
(VALID_MMX_REG_MODE): Add V4BFmode.
* config/i386/i386.md (mode): Add V4BF and V2BF.
(MODE_SIZE): Ditto.
* config/i386/mmx.md (MMXMODE) Add V4BF.
(V_32): Add V2BF.
(V_16_32_64): Add V4BF and V2BF.
(mmxinsnmode): Add V4BF and V2BF.
(*mov<mode>_internal): Hanlde V4BFmode and V2BFmode.
gcc/testsuite/ChangeLog:
* gcc.target/i386/pr107261.c: New test.
---
gcc/config/i386/i386-modes.def | 1 +
gcc/config/i386/i386.cc | 6 ++++
gcc/config/i386/i386.h | 9 +++---
gcc/config/i386/i386.md | 5 ++--
gcc/config/i386/mmx.md | 26 +++++++++-------
gcc/testsuite/gcc.target/i386/pr107261.c | 38 ++++++++++++++++++++++++
6 files changed, 68 insertions(+), 17 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/pr107261.c
Comments
I'm going to check in this patch.
On Wed, Oct 26, 2022 at 10:30 AM liuhongt <hongtao.liu@intel.com> wrote:
>
> Enable V4BFmode and V2BFmode with the same ABI as V4HFmode and
> V2HFmode. No real operation is supported for them except for movement.
> This should solve PR target/107261.
>
> Also I notice there's redundancy in VALID_AVX512FP16_REG_MODE, and
> remove V2BFmode remove it.
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk?
>
> gcc/ChangeLog:
>
> PR target/107261
> * config/i386/i386-modes.def (VECTOR_MODE): Support V2BFmode.
> * config/i386/i386.cc (classify_argument): Handle V4BFmode and
> V2BFmode.
> (ix86_convert_const_vector_to_integer): Ditto.
> * config/i386/i386.h (VALID_AVX512FP16_REG_MODE): Remove
> V2BFmode.
> (VALID_SSE2_REG_MODE): Add V4BFmode and V2BFmode.
> (VALID_MMX_REG_MODE): Add V4BFmode.
> * config/i386/i386.md (mode): Add V4BF and V2BF.
> (MODE_SIZE): Ditto.
> * config/i386/mmx.md (MMXMODE) Add V4BF.
> (V_32): Add V2BF.
> (V_16_32_64): Add V4BF and V2BF.
> (mmxinsnmode): Add V4BF and V2BF.
> (*mov<mode>_internal): Hanlde V4BFmode and V2BFmode.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/pr107261.c: New test.
> ---
> gcc/config/i386/i386-modes.def | 1 +
> gcc/config/i386/i386.cc | 6 ++++
> gcc/config/i386/i386.h | 9 +++---
> gcc/config/i386/i386.md | 5 ++--
> gcc/config/i386/mmx.md | 26 +++++++++-------
> gcc/testsuite/gcc.target/i386/pr107261.c | 38 ++++++++++++++++++++++++
> 6 files changed, 68 insertions(+), 17 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/i386/pr107261.c
>
> diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def
> index b49daaef253..dbc3165c5fc 100644
> --- a/gcc/config/i386/i386-modes.def
> +++ b/gcc/config/i386/i386-modes.def
> @@ -93,6 +93,7 @@ VECTOR_MODES (FLOAT, 64); /* V32HF V16SF V8DF V4TF */
> VECTOR_MODES (FLOAT, 128); /* V64HF V32SF V16DF V8TF */
> VECTOR_MODES (FLOAT, 256); /* V128HF V64SF V32DF V16TF */
> VECTOR_MODE (FLOAT, HF, 2); /* V2HF */
> +VECTOR_MODE (FLOAT, BF, 2); /* V2BF */
> VECTOR_MODE (FLOAT, HF, 6); /* V6HF */
> VECTOR_MODE (INT, TI, 1); /* V1TI */
> VECTOR_MODE (INT, DI, 1); /* V1DI */
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index aeea26ef4be..1aca7d55a09 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -2507,7 +2507,9 @@ classify_argument (machine_mode mode, const_tree type,
> case E_V2SImode:
> case E_V4HImode:
> case E_V4HFmode:
> + case E_V4BFmode:
> case E_V2HFmode:
> + case E_V2BFmode:
> case E_V8QImode:
> classes[0] = X86_64_SSE_CLASS;
> return 1;
> @@ -2991,6 +2993,7 @@ pass_in_reg:
> case E_V8QImode:
> case E_V4HImode:
> case E_V4HFmode:
> + case E_V4BFmode:
> case E_V2SImode:
> case E_V2SFmode:
> case E_V1TImode:
> @@ -3240,6 +3243,7 @@ pass_in_reg:
> case E_V8QImode:
> case E_V4HImode:
> case E_V4HFmode:
> + case E_V4BFmode:
> case E_V2SImode:
> case E_V2SFmode:
> case E_V1TImode:
> @@ -15810,7 +15814,9 @@ ix86_convert_const_vector_to_integer (rtx op, machine_mode mode)
> }
> break;
> case E_V2HFmode:
> + case E_V2BFmode:
> case E_V4HFmode:
> + case E_V4BFmode:
> case E_V2SFmode:
> for (int i = 0; i < nunits; ++i)
> {
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index fd7c9df47e5..16d9c606077 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -1033,13 +1033,12 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
> || (MODE) == V8BFmode || (MODE) == TImode)
>
> #define VALID_AVX512FP16_REG_MODE(MODE) \
> - ((MODE) == V8HFmode || (MODE) == V16HFmode || (MODE) == V32HFmode \
> - || (MODE) == V2HFmode)
> + ((MODE) == V8HFmode || (MODE) == V16HFmode || (MODE) == V32HFmode)
>
> #define VALID_SSE2_REG_MODE(MODE) \
> ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \
> || (MODE) == V8HFmode || (MODE) == V4HFmode || (MODE) == V2HFmode \
> - || (MODE) == V8BFmode \
> + || (MODE) == V8BFmode || (MODE) == V4BFmode || (MODE) == V2BFmode \
> || (MODE) == V4QImode || (MODE) == V2HImode || (MODE) == V1SImode \
> || (MODE) == V2DImode || (MODE) == V2QImode || (MODE) == DFmode \
> || (MODE) == HFmode || (MODE) == BFmode)
> @@ -1057,7 +1056,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
> ((MODE) == V1DImode || (MODE) == DImode \
> || (MODE) == V2SImode || (MODE) == SImode \
> || (MODE) == V4HImode || (MODE) == V8QImode \
> - || (MODE) == V4HFmode)
> + || (MODE) == V4HFmode || (MODE) == V4BFmode)
>
> #define VALID_MASK_REG_MODE(MODE) ((MODE) == HImode || (MODE) == QImode)
>
> @@ -1074,7 +1073,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
> || (MODE) == CSImode || (MODE) == CDImode \
> || (MODE) == SDmode || (MODE) == DDmode \
> || (MODE) == HFmode || (MODE) == HCmode || (MODE) == BFmode \
> - || (MODE) == V2HImode || (MODE) == V2HFmode \
> + || (MODE) == V2HImode || (MODE) == V2HFmode || (MODE) == V2BFmode \
> || (MODE) == V1SImode || (MODE) == V4QImode || (MODE) == V2QImode \
> || (TARGET_64BIT \
> && ((MODE) == TImode || (MODE) == CTImode \
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index baf1f1f8fa2..e46554efea8 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -500,7 +500,7 @@ (define_attr "type"
> ;; Main data type used by the insn
> (define_attr "mode"
> "unknown,none,QI,HI,SI,DI,TI,OI,XI,HF,BF,SF,DF,XF,TF,V32HF,V16HF,V8HF,
> - V16SF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,V8DF,V4HF,V2HF"
> + V16SF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,V8DF,V4HF,V4BF,V2HF,V2BF"
> (const_string "unknown"))
>
> ;; The CPU unit operations uses.
> @@ -1119,7 +1119,8 @@ (define_mode_attr MODE_SIZE [(QI "1") (HI "2") (SI "4") (DI "8")
> (V4SF "16") (V8SF "32") (V16SF "64")
> (V8HF "16") (V16HF "32") (V32HF "64")
> (V4HF "8") (V2HF "4")
> - (V8BF "16") (V16BF "32") (V32BF "64")])
> + (V8BF "16") (V16BF "32") (V32BF "64")
> + (V4BF "8") (V2BF "4")])
>
> ;; Double word integer modes as mode attribute.
> (define_mode_attr DWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI") (TI "OI")])
> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> index c359e2dd6de..d5134cc351e 100644
> --- a/gcc/config/i386/mmx.md
> +++ b/gcc/config/i386/mmx.md
> @@ -48,7 +48,7 @@ (define_mode_iterator MMXMODEI [V8QI V4HI V2SI])
> (define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI (V1DI "TARGET_SSE2")])
>
> ;; All 8-byte vector modes handled by MMX
> -(define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF V4HF])
> +(define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF V4HF V4BF])
> (define_mode_iterator MMXMODE124 [V8QI V4HI V2SI V2SF])
>
> ;; Mix-n-match
> @@ -58,7 +58,7 @@ (define_mode_iterator MMXMODE24 [V4HI V2SI])
> (define_mode_iterator MMXMODE248 [V4HI V2SI V1DI])
>
> ;; All 4-byte integer/float16 vector modes
> -(define_mode_iterator V_32 [V4QI V2HI V1SI V2HF])
> +(define_mode_iterator V_32 [V4QI V2HI V1SI V2HF V2BF])
>
> ;; 4-byte integer vector modes
> (define_mode_iterator VI_32 [V4QI V2HI])
> @@ -72,7 +72,8 @@ (define_mode_iterator VI1_16_32 [V4QI V2QI])
> ;; All 2-byte, 4-byte and 8-byte vector modes with more than 1 element
> (define_mode_iterator V_16_32_64
> [V2QI V4QI V2HI V2HF
> - (V8QI "TARGET_64BIT") (V4HI "TARGET_64BIT") (V4HF "TARGET_64BIT")
> + (V8QI "TARGET_64BIT") (V4HI "TARGET_64BIT")
> + (V4HF "TARGET_64BIT") (V4BF "TARGET_64BIT")
> (V2SI "TARGET_64BIT") (V2SF "TARGET_64BIT")])
>
> ;; V2S* modes
> @@ -92,6 +93,7 @@ (define_mode_attr mmxinsnmode
> (V4HI "DI") (V2HI "SI")
> (V2SI "DI")
> (V4HF "DI") (V2HF "SI")
> + (V4BF "DI") (V2BF "SI")
> (V2SF "DI")])
>
> (define_mode_attr mmxdoublemode
> @@ -213,9 +215,9 @@ (define_insn "*mov<mode>_internal"
> (cond [(eq_attr "alternative" "2")
> (const_string "SI")
> (eq_attr "alternative" "11,12")
> - (cond [(match_test "<MODE>mode == V2SFmode")
> - (const_string "V4SF")
> - (match_test "<MODE>mode == V4HFmode")
> + (cond [(match_test "<MODE>mode == V2SFmode
> + || <MODE>mode == V4HFmode
> + || <MODE>mode == V4BFmode")
> (const_string "V4SF")
> (ior (not (match_test "TARGET_SSE2"))
> (match_test "optimize_function_for_size_p (cfun)"))
> @@ -227,13 +229,15 @@ (define_insn "*mov<mode>_internal"
> (ior (ior (and (match_test "<MODE>mode == V2SFmode")
> (not (match_test "TARGET_MMX_WITH_SSE")))
> (not (match_test "TARGET_SSE2")))
> - (match_test "<MODE>mode == V4HFmode")))
> + (match_test "<MODE>mode == V4HFmode
> + || <MODE>mode == V4BFmode")))
> (const_string "V2SF")
>
> (and (eq_attr "alternative" "14")
> (ior (ior (match_test "<MODE>mode == V2SFmode")
> (not (match_test "TARGET_SSE2")))
> - (match_test "<MODE>mode == V4HFmode")))
> + (match_test "<MODE>mode == V4HFmode
> + || <MODE>mode == V4BFmode")))
> (const_string "V2SF")
> ]
> (const_string "DI")))
> @@ -321,7 +325,8 @@ (define_insn "*mov<mode>_internal"
> (const_string "*")))
> (set (attr "mode")
> (cond [(eq_attr "alternative" "2,3")
> - (cond [(match_test "<MODE>mode == V2HFmode")
> + (cond [(match_test "<MODE>mode == V2HFmode
> + || <MODE>mode == V2BFmode")
> (const_string "V4SF")
> (match_test "TARGET_AVX")
> (const_string "TI")
> @@ -332,7 +337,8 @@ (define_insn "*mov<mode>_internal"
> (const_string "TI"))
>
> (and (eq_attr "alternative" "4,5")
> - (ior (match_test "<MODE>mode == V2HFmode")
> + (ior (match_test "<MODE>mode == V2HFmode
> + || <MODE>mode == V2BFmode")
> (not (match_test "TARGET_SSE2"))))
> (const_string "SF")
> ]
> diff --git a/gcc/testsuite/gcc.target/i386/pr107261.c b/gcc/testsuite/gcc.target/i386/pr107261.c
> new file mode 100644
> index 00000000000..eb1d232fbfc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr107261.c
> @@ -0,0 +1,38 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msse2" } */
> +
> +typedef __bf16 v4bf __attribute__ ((vector_size (8)));
> +typedef __bf16 v2bf __attribute__ ((vector_size (4)));
> +
> +v4bf
> +v4bf_abi_1 (v4bf a)
> +{
> + return a;
> +}
> +
> +v4bf
> +v4bf_abi_3 (v4bf a, v4bf b, v4bf c)
> +{
> + return c;
> +}
> +
> +/* { dg-final { scan-assembler-times "movq\[\\t \]*%mm2, %mm0" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "movaps\[\\t \]*%xmm2, %xmm0" 1 { target { ! ia32 } } } } */
> +
> +v4bf
> +v4bf_abi_4 (v4bf a, v4bf b, v4bf c, v4bf d)
> +{
> + return d;
> +}
> +
> +/* { dg-final { scan-assembler-times "movq\[\\t \]*4\\(%esp\\), %mm0" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "movaps\[\\t \]*%xmm3, %xmm0" 1 { target { ! ia32 } } } } */
> +
> +v2bf
> +v2bf_test (v2bf a, v2bf b, v2bf c, v2bf d)
> +{
> + return b;
> +}
> +
> +/* { dg-final { scan-assembler-times "movl\[\\t \]*8\\(%esp\\), %eax" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "movaps\[\\t \]*%xmm1, %xmm0" 1 { target { ! ia32 } } } } */
> --
> 2.27.0
>
@@ -93,6 +93,7 @@ VECTOR_MODES (FLOAT, 64); /* V32HF V16SF V8DF V4TF */
VECTOR_MODES (FLOAT, 128); /* V64HF V32SF V16DF V8TF */
VECTOR_MODES (FLOAT, 256); /* V128HF V64SF V32DF V16TF */
VECTOR_MODE (FLOAT, HF, 2); /* V2HF */
+VECTOR_MODE (FLOAT, BF, 2); /* V2BF */
VECTOR_MODE (FLOAT, HF, 6); /* V6HF */
VECTOR_MODE (INT, TI, 1); /* V1TI */
VECTOR_MODE (INT, DI, 1); /* V1DI */
@@ -2507,7 +2507,9 @@ classify_argument (machine_mode mode, const_tree type,
case E_V2SImode:
case E_V4HImode:
case E_V4HFmode:
+ case E_V4BFmode:
case E_V2HFmode:
+ case E_V2BFmode:
case E_V8QImode:
classes[0] = X86_64_SSE_CLASS;
return 1;
@@ -2991,6 +2993,7 @@ pass_in_reg:
case E_V8QImode:
case E_V4HImode:
case E_V4HFmode:
+ case E_V4BFmode:
case E_V2SImode:
case E_V2SFmode:
case E_V1TImode:
@@ -3240,6 +3243,7 @@ pass_in_reg:
case E_V8QImode:
case E_V4HImode:
case E_V4HFmode:
+ case E_V4BFmode:
case E_V2SImode:
case E_V2SFmode:
case E_V1TImode:
@@ -15810,7 +15814,9 @@ ix86_convert_const_vector_to_integer (rtx op, machine_mode mode)
}
break;
case E_V2HFmode:
+ case E_V2BFmode:
case E_V4HFmode:
+ case E_V4BFmode:
case E_V2SFmode:
for (int i = 0; i < nunits; ++i)
{
@@ -1033,13 +1033,12 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
|| (MODE) == V8BFmode || (MODE) == TImode)
#define VALID_AVX512FP16_REG_MODE(MODE) \
- ((MODE) == V8HFmode || (MODE) == V16HFmode || (MODE) == V32HFmode \
- || (MODE) == V2HFmode)
+ ((MODE) == V8HFmode || (MODE) == V16HFmode || (MODE) == V32HFmode)
#define VALID_SSE2_REG_MODE(MODE) \
((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \
|| (MODE) == V8HFmode || (MODE) == V4HFmode || (MODE) == V2HFmode \
- || (MODE) == V8BFmode \
+ || (MODE) == V8BFmode || (MODE) == V4BFmode || (MODE) == V2BFmode \
|| (MODE) == V4QImode || (MODE) == V2HImode || (MODE) == V1SImode \
|| (MODE) == V2DImode || (MODE) == V2QImode || (MODE) == DFmode \
|| (MODE) == HFmode || (MODE) == BFmode)
@@ -1057,7 +1056,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
((MODE) == V1DImode || (MODE) == DImode \
|| (MODE) == V2SImode || (MODE) == SImode \
|| (MODE) == V4HImode || (MODE) == V8QImode \
- || (MODE) == V4HFmode)
+ || (MODE) == V4HFmode || (MODE) == V4BFmode)
#define VALID_MASK_REG_MODE(MODE) ((MODE) == HImode || (MODE) == QImode)
@@ -1074,7 +1073,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
|| (MODE) == CSImode || (MODE) == CDImode \
|| (MODE) == SDmode || (MODE) == DDmode \
|| (MODE) == HFmode || (MODE) == HCmode || (MODE) == BFmode \
- || (MODE) == V2HImode || (MODE) == V2HFmode \
+ || (MODE) == V2HImode || (MODE) == V2HFmode || (MODE) == V2BFmode \
|| (MODE) == V1SImode || (MODE) == V4QImode || (MODE) == V2QImode \
|| (TARGET_64BIT \
&& ((MODE) == TImode || (MODE) == CTImode \
@@ -500,7 +500,7 @@ (define_attr "type"
;; Main data type used by the insn
(define_attr "mode"
"unknown,none,QI,HI,SI,DI,TI,OI,XI,HF,BF,SF,DF,XF,TF,V32HF,V16HF,V8HF,
- V16SF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,V8DF,V4HF,V2HF"
+ V16SF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,V8DF,V4HF,V4BF,V2HF,V2BF"
(const_string "unknown"))
;; The CPU unit operations uses.
@@ -1119,7 +1119,8 @@ (define_mode_attr MODE_SIZE [(QI "1") (HI "2") (SI "4") (DI "8")
(V4SF "16") (V8SF "32") (V16SF "64")
(V8HF "16") (V16HF "32") (V32HF "64")
(V4HF "8") (V2HF "4")
- (V8BF "16") (V16BF "32") (V32BF "64")])
+ (V8BF "16") (V16BF "32") (V32BF "64")
+ (V4BF "8") (V2BF "4")])
;; Double word integer modes as mode attribute.
(define_mode_attr DWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI") (TI "OI")])
@@ -48,7 +48,7 @@ (define_mode_iterator MMXMODEI [V8QI V4HI V2SI])
(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI (V1DI "TARGET_SSE2")])
;; All 8-byte vector modes handled by MMX
-(define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF V4HF])
+(define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF V4HF V4BF])
(define_mode_iterator MMXMODE124 [V8QI V4HI V2SI V2SF])
;; Mix-n-match
@@ -58,7 +58,7 @@ (define_mode_iterator MMXMODE24 [V4HI V2SI])
(define_mode_iterator MMXMODE248 [V4HI V2SI V1DI])
;; All 4-byte integer/float16 vector modes
-(define_mode_iterator V_32 [V4QI V2HI V1SI V2HF])
+(define_mode_iterator V_32 [V4QI V2HI V1SI V2HF V2BF])
;; 4-byte integer vector modes
(define_mode_iterator VI_32 [V4QI V2HI])
@@ -72,7 +72,8 @@ (define_mode_iterator VI1_16_32 [V4QI V2QI])
;; All 2-byte, 4-byte and 8-byte vector modes with more than 1 element
(define_mode_iterator V_16_32_64
[V2QI V4QI V2HI V2HF
- (V8QI "TARGET_64BIT") (V4HI "TARGET_64BIT") (V4HF "TARGET_64BIT")
+ (V8QI "TARGET_64BIT") (V4HI "TARGET_64BIT")
+ (V4HF "TARGET_64BIT") (V4BF "TARGET_64BIT")
(V2SI "TARGET_64BIT") (V2SF "TARGET_64BIT")])
;; V2S* modes
@@ -92,6 +93,7 @@ (define_mode_attr mmxinsnmode
(V4HI "DI") (V2HI "SI")
(V2SI "DI")
(V4HF "DI") (V2HF "SI")
+ (V4BF "DI") (V2BF "SI")
(V2SF "DI")])
(define_mode_attr mmxdoublemode
@@ -213,9 +215,9 @@ (define_insn "*mov<mode>_internal"
(cond [(eq_attr "alternative" "2")
(const_string "SI")
(eq_attr "alternative" "11,12")
- (cond [(match_test "<MODE>mode == V2SFmode")
- (const_string "V4SF")
- (match_test "<MODE>mode == V4HFmode")
+ (cond [(match_test "<MODE>mode == V2SFmode
+ || <MODE>mode == V4HFmode
+ || <MODE>mode == V4BFmode")
(const_string "V4SF")
(ior (not (match_test "TARGET_SSE2"))
(match_test "optimize_function_for_size_p (cfun)"))
@@ -227,13 +229,15 @@ (define_insn "*mov<mode>_internal"
(ior (ior (and (match_test "<MODE>mode == V2SFmode")
(not (match_test "TARGET_MMX_WITH_SSE")))
(not (match_test "TARGET_SSE2")))
- (match_test "<MODE>mode == V4HFmode")))
+ (match_test "<MODE>mode == V4HFmode
+ || <MODE>mode == V4BFmode")))
(const_string "V2SF")
(and (eq_attr "alternative" "14")
(ior (ior (match_test "<MODE>mode == V2SFmode")
(not (match_test "TARGET_SSE2")))
- (match_test "<MODE>mode == V4HFmode")))
+ (match_test "<MODE>mode == V4HFmode
+ || <MODE>mode == V4BFmode")))
(const_string "V2SF")
]
(const_string "DI")))
@@ -321,7 +325,8 @@ (define_insn "*mov<mode>_internal"
(const_string "*")))
(set (attr "mode")
(cond [(eq_attr "alternative" "2,3")
- (cond [(match_test "<MODE>mode == V2HFmode")
+ (cond [(match_test "<MODE>mode == V2HFmode
+ || <MODE>mode == V2BFmode")
(const_string "V4SF")
(match_test "TARGET_AVX")
(const_string "TI")
@@ -332,7 +337,8 @@ (define_insn "*mov<mode>_internal"
(const_string "TI"))
(and (eq_attr "alternative" "4,5")
- (ior (match_test "<MODE>mode == V2HFmode")
+ (ior (match_test "<MODE>mode == V2HFmode
+ || <MODE>mode == V2BFmode")
(not (match_test "TARGET_SSE2"))))
(const_string "SF")
]
new file mode 100644
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+typedef __bf16 v4bf __attribute__ ((vector_size (8)));
+typedef __bf16 v2bf __attribute__ ((vector_size (4)));
+
+v4bf
+v4bf_abi_1 (v4bf a)
+{
+ return a;
+}
+
+v4bf
+v4bf_abi_3 (v4bf a, v4bf b, v4bf c)
+{
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "movq\[\\t \]*%mm2, %mm0" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "movaps\[\\t \]*%xmm2, %xmm0" 1 { target { ! ia32 } } } } */
+
+v4bf
+v4bf_abi_4 (v4bf a, v4bf b, v4bf c, v4bf d)
+{
+ return d;
+}
+
+/* { dg-final { scan-assembler-times "movq\[\\t \]*4\\(%esp\\), %mm0" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "movaps\[\\t \]*%xmm3, %xmm0" 1 { target { ! ia32 } } } } */
+
+v2bf
+v2bf_test (v2bf a, v2bf b, v2bf c, v2bf d)
+{
+ return b;
+}
+
+/* { dg-final { scan-assembler-times "movl\[\\t \]*8\\(%esp\\), %eax" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "movaps\[\\t \]*%xmm1, %xmm0" 1 { target { ! ia32 } } } } */