Hi!
x86_64/i686 has for a few weeks working std::bfloat16_t support, __bf16
there is no longer a storage only type, but can be used for arithmetics
and is supported in libgcc and libstdc++.
The following patch adds similar support for AArch64.
Bootstrapped/regtested on aarch64-linux.
Regressions are:
+FAIL: 26_numerics/headers/cmath/functions_std_c++23.cc (test for excess errors)
this one is something I need to look at:
functions_std_c++23.cc:(.text._Z14test_functionsIDFb16_EvPT_PiPlPx[_Z14test_functionsIDFb16_EvPT_PiPlPx]+0x738): undefined reference to `__floatdibf'
(4 times). I need to compare to x86, I believe we want to do a DI -> SF
conversion followed by SF -> BF, but it is unclear why that isn't happening.
+FAIL: gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c -march=armv8.2-a+sve -moverride=tune=none (test for errors, line 21)
svbfdot (f32, bf16, 0); /* { dg-error {invalid conversion to type 'bfloat16_t'} } */
This test tests for something that no longer fails, so could be just
adjusted.
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++11 scan-assembler \\t.global\\t_Z1fPu6__bf16
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++11 scan-assembler \\t.global\\t_Z1gPu6__bf16S_
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++11 scan-assembler \\t.global\\t_ZN1SIu6__bf16u6__bf16E1iE
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++14 scan-assembler \\t.global\\t_Z1fPu6__bf16
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++14 scan-assembler \\t.global\\t_Z1gPu6__bf16S_
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++14 scan-assembler \\t.global\\t_ZN1SIu6__bf16u6__bf16E1iE
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++17 scan-assembler \\t.global\\t_Z1fPu6__bf16
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++17 scan-assembler \\t.global\\t_Z1gPu6__bf16S_
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++17 scan-assembler \\t.global\\t_ZN1SIu6__bf16u6__bf16E1iE
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++20 scan-assembler \\t.global\\t_Z1fPu6__bf16
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++20 scan-assembler \\t.global\\t_Z1gPu6__bf16S_
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++20 scan-assembler \\t.global\\t_ZN1SIu6__bf16u6__bf16E1iE
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++2b scan-assembler \\t.global\\t_Z1fPu6__bf16
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++2b scan-assembler \\t.global\\t_Z1gPu6__bf16S_
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++2b scan-assembler \\t.global\\t_ZN1SIu6__bf16u6__bf16E1iE
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++98 scan-assembler \\t.global\\t_Z1fPu6__bf16
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++98 scan-assembler \\t.global\\t_Z1gPu6__bf16S_
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++98 scan-assembler \\t.global\\t_ZN1SIu6__bf16u6__bf16E1iE
These test the mangling, which changed from u6__bf16 to the standard DF16b.
Now, while on x86 we change the mangling and behavior of __bf16, it doesn't
need to be necessarily like that on aarch64 (although it would be nice for
consistency), for C++ portable code would just use std::bfloat16_t type
which is in libstdc++ defined to decltype(0.0bf16).
So, if you want to keep previous mangling of __bf16 type or keep it storage
only type, we can always register some other name (__bfloat16_t or whatever),
make __bf16 and __bfloat16_t be distinct types (former aarch64_bf16_type_node
in the compiler, the latter bfloat16_type_node) and thus have
0.0bf16 have the latter type and libstdc++ using it.
2022-11-13 Jakub Jelinek <jakub@redhat.com>
gcc/
* config/aarch64/aarch64.h (aarch64_bf16_type_node): Remove.
(aarch64_bf16_ptr_type_node): Adjust comment.
* config/aarch64/aarch64.cc (aarch64_gimplify_va_arg_expr): Use
bfloat16_type_node rather than aarch64_bf16_type_node.
(aarch64_mangle_type): Mangle BFmode as DF16b.
(aarch64_libgcc_floating_mode_supported_p,
aarch64_scalar_mode_supported_p): Also support BFmode.
(aarch64_invalid_conversion, aarch64_invalid_unary_op): Remove.
aarch64_invalid_binary_op): Remove BFmode related rejections.
(TARGET_INVALID_CONVERSION, TARGET_INVALID_UNARY_OP): Don't redefine.
* config/aarch64/aarch64-builtins.cc (aarch64_bf16_type_node): Remove.
(aarch64_int_or_fp_type): Use bfloat16_type_node rather than
aarch64_bf16_type_node.
(aarch64_init_simd_builtin_types): Likewise.
(aarch64_init_bf16_types): Likewise. Don't create bfloat16_type_node,
which is created in tree.cc already.
* config/aarch64/aarch64-sve-builtins.def (svbfloat16_t): Likewise.
libgcc/
* config/aarch64/t-softfp (softfp_extensions): Add bfsf.
(softfp_truncations): Add tfbf dfbf sfbf hfbf.
* config/aarch64/libgcc-softfp.ver (GCC_13.0.0): Export
__extendbfsf2 and __trunc{s,d,t,h}fbf2.
* config/aarch64/sfp-machine.h (_FP_NANFRAC_B, _FP_NANSIGN_B): Define.
Jakub
@@ -1220,9 +1220,8 @@ extern const char *aarch64_rewrite_mcpu (int argc, const char **argv);
extern GTY(()) tree aarch64_fp16_type_node;
extern GTY(()) tree aarch64_fp16_ptr_type_node;
-/* This type is the user-visible __bf16, and a pointer to that type. Defined
- in aarch64-builtins.cc. */
-extern GTY(()) tree aarch64_bf16_type_node;
+/* Pointer to the user-visible __bf16 type. __bf16 itself is generic
+ bfloat16_type_node. Defined in aarch64-builtins.cc. */
extern GTY(()) tree aarch64_bf16_ptr_type_node;
/* The generic unwind code in libgcc does not initialize the frame pointer.
@@ -918,7 +918,6 @@ tree aarch64_fp16_type_node = NULL_TREE;
tree aarch64_fp16_ptr_type_node = NULL_TREE;
/* Back-end node type for brain float (bfloat) types. */
-tree aarch64_bf16_type_node = NULL_TREE;
tree aarch64_bf16_ptr_type_node = NULL_TREE;
/* Wrapper around add_builtin_function. NAME is the name of the built-in
@@ -1010,7 +1009,7 @@ aarch64_int_or_fp_type (machine_mode mode,
case E_DFmode:
return double_type_node;
case E_BFmode:
- return aarch64_bf16_type_node;
+ return bfloat16_type_node;
default:
gcc_unreachable ();
}
@@ -1124,8 +1123,8 @@ aarch64_init_simd_builtin_types (void)
aarch64_simd_types[Float64x2_t].eltype = double_type_node;
/* Init Bfloat vector types with underlying __bf16 type. */
- aarch64_simd_types[Bfloat16x4_t].eltype = aarch64_bf16_type_node;
- aarch64_simd_types[Bfloat16x8_t].eltype = aarch64_bf16_type_node;
+ aarch64_simd_types[Bfloat16x4_t].eltype = bfloat16_type_node;
+ aarch64_simd_types[Bfloat16x8_t].eltype = bfloat16_type_node;
for (i = 0; i < nelts; i++)
{
@@ -1197,7 +1196,7 @@ aarch64_init_simd_builtin_scalar_types (void)
"__builtin_aarch64_simd_poly128");
(*lang_hooks.types.register_builtin_type) (intTI_type_node,
"__builtin_aarch64_simd_ti");
- (*lang_hooks.types.register_builtin_type) (aarch64_bf16_type_node,
+ (*lang_hooks.types.register_builtin_type) (bfloat16_type_node,
"__builtin_aarch64_simd_bf");
/* Unsigned integer types for various mode sizes. */
(*lang_hooks.types.register_builtin_type) (unsigned_intQI_type_node,
@@ -1682,13 +1681,8 @@ aarch64_init_fp16_types (void)
static void
aarch64_init_bf16_types (void)
{
- aarch64_bf16_type_node = make_node (REAL_TYPE);
- TYPE_PRECISION (aarch64_bf16_type_node) = 16;
- SET_TYPE_MODE (aarch64_bf16_type_node, BFmode);
- layout_type (aarch64_bf16_type_node);
-
- lang_hooks.types.register_builtin_type (aarch64_bf16_type_node, "__bf16");
- aarch64_bf16_ptr_type_node = build_pointer_type (aarch64_bf16_type_node);
+ lang_hooks.types.register_builtin_type (bfloat16_type_node, "__bf16");
+ aarch64_bf16_ptr_type_node = build_pointer_type (bfloat16_type_node);
}
/* Pointer authentication builtins that will become NOP on legacy platform.
@@ -19823,7 +19823,7 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
field_ptr_t = aarch64_fp16_ptr_type_node;
break;
case E_BFmode:
- field_t = aarch64_bf16_type_node;
+ field_t = bfloat16_type_node;
field_ptr_t = aarch64_bf16_ptr_type_node;
break;
case E_V2SImode:
@@ -20730,7 +20730,7 @@ aarch64_mangle_type (const_tree type)
if (TYPE_MAIN_VARIANT (type) == float16_type_node)
return NULL;
if (TYPE_MODE (type) == BFmode)
- return "u6__bf16";
+ return "DF16b";
else
return "Dh";
}
@@ -26428,18 +26428,18 @@ aarch64_dwarf_poly_indeterminate_value (unsigned int i, unsigned int *factor,
}
/* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
- if MODE is HFmode, and punt to the generic implementation otherwise. */
+ if MODE is [BH]Fmode, and punt to the generic implementation otherwise. */
static bool
aarch64_libgcc_floating_mode_supported_p (scalar_float_mode mode)
{
- return (mode == HFmode
+ return ((mode == HFmode || mode == BFmode)
? true
: default_libgcc_floating_mode_supported_p (mode));
}
/* Implement TARGET_SCALAR_MODE_SUPPORTED_P - return TRUE
- if MODE is HFmode, and punt to the generic implementation otherwise. */
+ if MODE is [BH]Fmode, and punt to the generic implementation otherwise. */
static bool
aarch64_scalar_mode_supported_p (scalar_mode mode)
@@ -26447,7 +26447,7 @@ aarch64_scalar_mode_supported_p (scalar_mode mode)
if (DECIMAL_FLOAT_MODE_P (mode))
return default_decimal_float_supported_p ();
- return (mode == HFmode
+ return ((mode == HFmode || mode == BFmode)
? true
: default_scalar_mode_supported_p (mode));
}
@@ -26905,39 +26905,6 @@ aarch64_stack_protect_guard (void)
return NULL_TREE;
}
-/* Return the diagnostic message string if conversion from FROMTYPE to
- TOTYPE is not allowed, NULL otherwise. */
-
-static const char *
-aarch64_invalid_conversion (const_tree fromtype, const_tree totype)
-{
- if (element_mode (fromtype) != element_mode (totype))
- {
- /* Do no allow conversions to/from BFmode scalar types. */
- if (TYPE_MODE (fromtype) == BFmode)
- return N_("invalid conversion from type %<bfloat16_t%>");
- if (TYPE_MODE (totype) == BFmode)
- return N_("invalid conversion to type %<bfloat16_t%>");
- }
-
- /* Conversion allowed. */
- return NULL;
-}
-
-/* Return the diagnostic message string if the unary operation OP is
- not permitted on TYPE, NULL otherwise. */
-
-static const char *
-aarch64_invalid_unary_op (int op, const_tree type)
-{
- /* Reject all single-operand operations on BFmode except for &. */
- if (element_mode (type) == BFmode && op != ADDR_EXPR)
- return N_("operation not permitted on type %<bfloat16_t%>");
-
- /* Operation allowed. */
- return NULL;
-}
-
/* Return the diagnostic message string if the binary operation OP is
not permitted on TYPE1 and TYPE2, NULL otherwise. */
@@ -26945,11 +26912,6 @@ static const char *
aarch64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
const_tree type2)
{
- /* Reject all 2-operand operations on BFmode. */
- if (element_mode (type1) == BFmode
- || element_mode (type2) == BFmode)
- return N_("operation not permitted on type %<bfloat16_t%>");
-
if (VECTOR_TYPE_P (type1)
&& VECTOR_TYPE_P (type2)
&& !TYPE_INDIVISIBLE_P (type1)
@@ -27546,12 +27508,6 @@ aarch64_libgcc_floating_mode_supported_p
#undef TARGET_MANGLE_TYPE
#define TARGET_MANGLE_TYPE aarch64_mangle_type
-#undef TARGET_INVALID_CONVERSION
-#define TARGET_INVALID_CONVERSION aarch64_invalid_conversion
-
-#undef TARGET_INVALID_UNARY_OP
-#define TARGET_INVALID_UNARY_OP aarch64_invalid_unary_op
-
#undef TARGET_INVALID_BINARY_OP
#define TARGET_INVALID_BINARY_OP aarch64_invalid_binary_op
@@ -61,7 +61,7 @@ DEF_SVE_MODE (u64offset, none, svuint64_t, bytes)
DEF_SVE_MODE (vnum, none, none, vectors)
DEF_SVE_TYPE (svbool_t, 10, __SVBool_t, boolean_type_node)
-DEF_SVE_TYPE (svbfloat16_t, 14, __SVBfloat16_t, aarch64_bf16_type_node)
+DEF_SVE_TYPE (svbfloat16_t, 14, __SVBfloat16_t, bfloat16_type_node)
DEF_SVE_TYPE (svfloat16_t, 13, __SVFloat16_t, aarch64_fp16_type_node)
DEF_SVE_TYPE (svfloat32_t, 13, __SVFloat32_t, float_type_node)
DEF_SVE_TYPE (svfloat64_t, 13, __SVFloat64_t, double_type_node)
@@ -1,7 +1,7 @@
softfp_float_modes := tf
softfp_int_modes := si di ti
-softfp_extensions := sftf dftf hftf
-softfp_truncations := tfsf tfdf tfhf
+softfp_extensions := sftf dftf hftf bfsf
+softfp_truncations := tfsf tfdf tfhf tfbf dfbf sfbf hfbf
softfp_exclude_libgcc2 := n
softfp_extras := fixhfti fixunshfti floattihf floatuntihf
@@ -26,3 +26,12 @@ GCC_11.0 {
__mulhc3
__trunctfhf2
}
+
+%inherit GCC_13.0.0 GCC_11.0.0
+GCC_13.0.0 {
+ __extendbfsf2
+ __truncdfbf2
+ __truncsfbf2
+ __trunctfbf2
+ __trunchfbf2
+}
@@ -43,10 +43,12 @@ typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__)));
#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_2_udiv(Q,R,X,Y)
#define _FP_NANFRAC_H ((_FP_QNANBIT_H << 1) - 1)
+#define _FP_NANFRAC_B ((_FP_QNANBIT_B << 1) - 1)
#define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1)
#define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1)
#define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1
#define _FP_NANSIGN_H 0
+#define _FP_NANSIGN_B 0
#define _FP_NANSIGN_S 0
#define _FP_NANSIGN_D 0
#define _FP_NANSIGN_Q 0