Hi Maciej:
It’s not intentionally, I suspect that is because I port from our internal
old gcc branch, will send patch to fix that later, thanks for catching this!
Maciej W. Rozycki <macro@embecosm.com>於 2022年12月5日 週一,21:05寫道:
> Hi Kito,
>
> I came across this issue while inspecting code and I have been wondering
> what the reason was to downgrade current FMV.X.W and FMW.W.X instructions
> to their older FMV.S.W and FMV.W.S variants here:
>
> On Wed, 10 Aug 2022, Kito Cheng wrote:
>
> > diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> > index 5a0adffb5ce..47e6110767c 100644
> > --- a/gcc/config/riscv/riscv.cc
> > +++ b/gcc/config/riscv/riscv.cc
> > @@ -2308,10 +2310,19 @@ riscv_output_move (rtx dest, rtx src)
> > if (dest_code == REG && GP_REG_P (REGNO (dest)))
> > {
> > if (src_code == REG && FP_REG_P (REGNO (src)))
> > - return dbl_p ? "fmv.x.d\t%0,%1" : "fmv.x.w\t%0,%1";
> > + switch (width)
> > + {
> > + case 2:
> > + /* Using fmv.x.s + sign-extend to emulate fmv.x.h. */
> > + return "fmv.x.s\t%0,%1;slli\t%0,%0,16;srai\t%0,%0,16";
> > + case 4:
> > + return "fmv.x.s\t%0,%1";
> > + case 8:
> > + return "fmv.x.d\t%0,%1";
> > + }
>
> and here:
>
> > @@ -2353,18 +2364,24 @@ riscv_output_move (rtx dest, rtx src)
> > return "mv\t%0,%z1";
> >
> > if (FP_REG_P (REGNO (dest)))
> > - {
> > - if (!dbl_p)
> > - return "fmv.w.x\t%0,%z1";
> > - if (TARGET_64BIT)
> > - return "fmv.d.x\t%0,%z1";
> > - /* in RV32, we can emulate fmv.d.x %0, x0 using fcvt.d.w */
> > - gcc_assert (src == CONST0_RTX (mode));
> > - return "fcvt.d.w\t%0,x0";
> > - }
> > + switch (width)
> > + {
> > + case 2:
> > + /* High 16 bits should be all-1, otherwise HW will treated
> > + as a n-bit canonical NaN, but isn't matter for
> softfloat. */
> > + return "fmv.s.x\t%0,%1";
> > + case 4:
> > + return "fmv.s.x\t%0,%z1";
> > + case 8:
> > + if (TARGET_64BIT)
> > + return "fmv.d.x\t%0,%z1";
> > + /* in RV32, we can emulate fmv.d.x %0, x0 using fcvt.d.w */
>
> (Incorrect comment formatting here as well.)
>
> > + gcc_assert (src == CONST0_RTX (mode));
> > + return "fcvt.d.w\t%0,x0";
> > + }
>
> Was it intentional or just an oversight in review? If intentional, I'd
> expect such a change to happen on its own rather than sneaked in with a
> large functional update.
>
> Maciej
>
@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3. If not see
#include "recog.h"
#include "diagnostic-core.h"
#include "stor-layout.h"
+#include "stringpool.h"
#include "expr.h"
#include "langhooks.h"
@@ -160,6 +161,8 @@ static GTY(()) int riscv_builtin_decl_index[NUM_INSN_CODES];
#define GET_BUILTIN_DECL(CODE) \
riscv_builtin_decls[riscv_builtin_decl_index[(CODE)]]
+tree riscv_float16_type_node = NULL_TREE;
+
/* Return the function type associated with function prototype TYPE. */
static tree
@@ -185,11 +188,32 @@ riscv_build_function_type (enum riscv_function_type type)
return types[(int) type];
}
+static void
+riscv_init_builtin_types (void)
+{
+ /* Provide the _Float16 type and float16_type_node if needed. */
+ if (!float16_type_node)
+ {
+ riscv_float16_type_node = make_node (REAL_TYPE);
+ TYPE_PRECISION (riscv_float16_type_node) = 16;
+ SET_TYPE_MODE (riscv_float16_type_node, HFmode);
+ layout_type (riscv_float16_type_node);
+ }
+ else
+ riscv_float16_type_node = float16_type_node;
+
+ if (!maybe_get_identifier ("_Float16"))
+ lang_hooks.types.register_builtin_type (riscv_float16_type_node,
+ "_Float16");
+}
+
/* Implement TARGET_INIT_BUILTINS. */
void
riscv_init_builtins (void)
{
+ riscv_init_builtin_types ();
+
for (size_t i = 0; i < ARRAY_SIZE (riscv_builtins); i++)
{
const struct riscv_builtin_description *d = &riscv_builtins[i];
@@ -19,4 +19,5 @@ You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
+FLOAT_MODE (HF, 2, ieee_half_format);
FLOAT_MODE (TF, 16, ieee_quad_format);
@@ -2296,11 +2296,13 @@ riscv_output_move (rtx dest, rtx src)
enum rtx_code dest_code, src_code;
machine_mode mode;
bool dbl_p;
+ unsigned width;
dest_code = GET_CODE (dest);
src_code = GET_CODE (src);
mode = GET_MODE (dest);
dbl_p = (GET_MODE_SIZE (mode) == 8);
+ width = GET_MODE_SIZE (mode);
if (dbl_p && riscv_split_64bit_move_p (dest, src))
return "#";
@@ -2308,10 +2310,19 @@ riscv_output_move (rtx dest, rtx src)
if (dest_code == REG && GP_REG_P (REGNO (dest)))
{
if (src_code == REG && FP_REG_P (REGNO (src)))
- return dbl_p ? "fmv.x.d\t%0,%1" : "fmv.x.w\t%0,%1";
+ switch (width)
+ {
+ case 2:
+ /* Using fmv.x.s + sign-extend to emulate fmv.x.h. */
+ return "fmv.x.s\t%0,%1;slli\t%0,%0,16;srai\t%0,%0,16";
+ case 4:
+ return "fmv.x.s\t%0,%1";
+ case 8:
+ return "fmv.x.d\t%0,%1";
+ }
if (src_code == MEM)
- switch (GET_MODE_SIZE (mode))
+ switch (width)
{
case 1: return "lbu\t%0,%1";
case 2: return "lhu\t%0,%1";
@@ -2353,18 +2364,24 @@ riscv_output_move (rtx dest, rtx src)
return "mv\t%0,%z1";
if (FP_REG_P (REGNO (dest)))
- {
- if (!dbl_p)
- return "fmv.w.x\t%0,%z1";
- if (TARGET_64BIT)
- return "fmv.d.x\t%0,%z1";
- /* in RV32, we can emulate fmv.d.x %0, x0 using fcvt.d.w */
- gcc_assert (src == CONST0_RTX (mode));
- return "fcvt.d.w\t%0,x0";
- }
+ switch (width)
+ {
+ case 2:
+ /* High 16 bits should be all-1, otherwise HW will treated
+ as a n-bit canonical NaN, but isn't matter for softfloat. */
+ return "fmv.s.x\t%0,%1";
+ case 4:
+ return "fmv.s.x\t%0,%z1";
+ case 8:
+ if (TARGET_64BIT)
+ return "fmv.d.x\t%0,%z1";
+ /* in RV32, we can emulate fmv.d.x %0, x0 using fcvt.d.w */
+ gcc_assert (src == CONST0_RTX (mode));
+ return "fcvt.d.w\t%0,x0";
+ }
}
if (dest_code == MEM)
- switch (GET_MODE_SIZE (mode))
+ switch (width)
{
case 1: return "sb\t%z1,%0";
case 2: return "sh\t%z1,%0";
@@ -2375,7 +2392,15 @@ riscv_output_move (rtx dest, rtx src)
if (src_code == REG && FP_REG_P (REGNO (src)))
{
if (dest_code == REG && FP_REG_P (REGNO (dest)))
- return dbl_p ? "fmv.d\t%0,%1" : "fmv.s\t%0,%1";
+ switch (width)
+ {
+ case 2:
+ return "fmv.s\t%0,%1";
+ case 4:
+ return "fmv.s\t%0,%1";
+ case 8:
+ return "fmv.d\t%0,%1";
+ }
if (dest_code == MEM)
return dbl_p ? "fsd\t%1,%0" : "fsw\t%1,%0";
@@ -5638,6 +5663,107 @@ riscv_asan_shadow_offset (void)
return TARGET_64BIT ? (HOST_WIDE_INT_1 << 29) : 0;
}
+/* Implement TARGET_MANGLE_TYPE. */
+
+static const char *
+riscv_mangle_type (const_tree type)
+{
+ /* Half-precision float, _Float16 is "DF16_". */
+ if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
+ return "DF16_";
+
+ /* Use the default mangling. */
+ return NULL;
+}
+
+/* Implement TARGET_SCALAR_MODE_SUPPORTED_P. */
+
+static bool
+riscv_scalar_mode_supported_p (scalar_mode mode)
+{
+ if (mode == HFmode)
+ return true;
+ else
+ return default_scalar_mode_supported_p (mode);
+}
+
+/* Implement TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P - return TRUE
+ if MODE is HFmode, and punt to the generic implementation otherwise. */
+
+static bool
+riscv_libgcc_floating_mode_supported_p (scalar_float_mode mode)
+{
+ if (mode == HFmode)
+ return true;
+ else
+ return default_libgcc_floating_mode_supported_p (mode);
+}
+
+/* Set the value of FLT_EVAL_METHOD.
+ ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
+
+ 0: evaluate all operations and constants, whose semantic type has at
+ most the range and precision of type float, to the range and
+ precision of float; evaluate all other operations and constants to
+ the range and precision of the semantic type;
+
+ N, where _FloatN is a supported interchange floating type
+ evaluate all operations and constants, whose semantic type has at
+ most the range and precision of _FloatN type, to the range and
+ precision of the _FloatN type; evaluate all other operations and
+ constants to the range and precision of the semantic type;
+
+ If we have the zfh extensions then we support _Float16 in native
+ precision, so we should set this to 16. */
+static enum flt_eval_method
+riscv_excess_precision (enum excess_precision_type type)
+{
+ switch (type)
+ {
+ case EXCESS_PRECISION_TYPE_FAST:
+ case EXCESS_PRECISION_TYPE_STANDARD:
+ return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
+ case EXCESS_PRECISION_TYPE_IMPLICIT:
+ return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
+ default:
+ gcc_unreachable ();
+ }
+ return FLT_EVAL_METHOD_UNPREDICTABLE;
+}
+
+/* Implement TARGET_FLOATN_MODE. */
+static opt_scalar_float_mode
+riscv_floatn_mode (int n, bool extended)
+{
+ if (!extended && n == 16)
+ return HFmode;
+
+ return default_floatn_mode (n, extended);
+}
+
+static void
+riscv_init_libfuncs (void)
+{
+ /* Half-precision float operations. The compiler handles all operations
+ with NULL libfuncs by converting to SFmode. */
+
+ /* Arithmetic. */
+ set_optab_libfunc (add_optab, HFmode, NULL);
+ set_optab_libfunc (sdiv_optab, HFmode, NULL);
+ set_optab_libfunc (smul_optab, HFmode, NULL);
+ set_optab_libfunc (neg_optab, HFmode, NULL);
+ set_optab_libfunc (sub_optab, HFmode, NULL);
+
+ /* Comparisons. */
+ set_optab_libfunc (eq_optab, HFmode, NULL);
+ set_optab_libfunc (ne_optab, HFmode, NULL);
+ set_optab_libfunc (lt_optab, HFmode, NULL);
+ set_optab_libfunc (le_optab, HFmode, NULL);
+ set_optab_libfunc (ge_optab, HFmode, NULL);
+ set_optab_libfunc (gt_optab, HFmode, NULL);
+ set_optab_libfunc (unord_optab, HFmode, NULL);
+}
+
/* Initialize the GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -5821,6 +5947,25 @@ riscv_asan_shadow_offset (void)
#undef TARGET_NEW_ADDRESS_PROFITABLE_P
#define TARGET_NEW_ADDRESS_PROFITABLE_P riscv_new_address_profitable_p
+#undef TARGET_MANGLE_TYPE
+#define TARGET_MANGLE_TYPE riscv_mangle_type
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P riscv_scalar_mode_supported_p
+
+#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
+#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
+ riscv_libgcc_floating_mode_supported_p
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS riscv_init_libfuncs
+
+#undef TARGET_C_EXCESS_PRECISION
+#define TARGET_C_EXCESS_PRECISION riscv_excess_precision
+
+#undef TARGET_FLOATN_MODE
+#define TARGET_FLOATN_MODE riscv_floatn_mode
+
#undef TARGET_ASAN_SHADOW_OFFSET
#define TARGET_ASAN_SHADOW_OFFSET riscv_asan_shadow_offset
@@ -134,7 +134,7 @@ (define_attr "move_type"
(const_string "unknown"))
;; Main data type used by the insn
-(define_attr "mode" "unknown,none,QI,HI,SI,DI,TI,SF,DF,TF"
+(define_attr "mode" "unknown,none,QI,HI,SI,DI,TI,HF,SF,DF,TF"
(const_string "unknown"))
;; True if the main data type is twice the size of a word.
@@ -324,20 +324,20 @@ (define_mode_attr load [(QI "lb") (HI "lh") (SI "lw") (DI "ld") (SF "flw") (DF "
(define_mode_attr default_load [(QI "lbu") (HI "lhu") (SI "lw") (DI "ld")])
;; Mode attribute for FP loads into integer registers.
-(define_mode_attr softload [(SF "lw") (DF "ld")])
+(define_mode_attr softload [(HF "lh") (SF "lw") (DF "ld")])
;; Instruction names for stores.
(define_mode_attr store [(QI "sb") (HI "sh") (SI "sw") (DI "sd") (SF "fsw") (DF "fsd")])
;; Instruction names for FP stores from integer registers.
-(define_mode_attr softstore [(SF "sw") (DF "sd")])
+(define_mode_attr softstore [(HF "sh") (SF "sw") (DF "sd")])
;; This attribute gives the best constraint to use for registers of
;; a given mode.
(define_mode_attr reg [(SI "d") (DI "d") (CC "d")])
;; This attribute gives the format suffix for floating-point operations.
-(define_mode_attr fmt [(SF "s") (DF "d")])
+(define_mode_attr fmt [(HF "h") (SF "s") (DF "d")])
;; This attribute gives the integer suffix for floating-point conversions.
(define_mode_attr ifmt [(SI "w") (DI "l")])
@@ -347,7 +347,7 @@ (define_mode_attr amo [(SI "w") (DI "d")])
;; This attribute gives the upper-case mode name for one unit of a
;; floating-point mode.
-(define_mode_attr UNITMODE [(SF "SF") (DF "DF")])
+(define_mode_attr UNITMODE [(HF "HF") (SF "SF") (DF "DF")])
;; This attribute gives the integer mode that has half the size of
;; the controlling mode.
@@ -1450,6 +1450,26 @@ (define_insn "extendsfdf2"
[(set_attr "type" "fcvt")
(set_attr "mode" "DF")])
+;; 16-bit floating point moves
+(define_expand "movhf"
+ [(set (match_operand:HF 0 "")
+ (match_operand:HF 1 ""))]
+ ""
+{
+ if (riscv_legitimize_move (HFmode, operands[0], operands[1]))
+ DONE;
+})
+
+
+(define_insn "*movhf_softfloat"
+ [(set (match_operand:HF 0 "nonimmediate_operand" "=f, r,r,m,*f,*r")
+ (match_operand:HF 1 "move_operand" " f,Gr,m,r,*r,*f"))]
+ "(register_operand (operands[0], HFmode)
+ || reg_or_0_operand (operands[1], HFmode))"
+ { return riscv_output_move (operands[0], operands[1]); }
+ [(set_attr "move_type" "fmove,move,load,store,mtc,mfc")
+ (set_attr "mode" "HF")])
+
;;
;; ....................
;;
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+
+_Float16 x;
+
+_Float16 foo1 (_Float16 a, _Float16 b)
+{
+ return a + b;
+}
+
+_Float16 foo2 (_Float16 a, _Float16 b)
+{
+ return a * b;
+}
+
+int foo3 (_Float16 a, _Float16 b)
+{
+ return a > b;
+}
new file mode 100644
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64if -mabi=lp64f -O" } */
+
+_Float16 test_soft_move (_Float16 a, _Float16 b)
+{
+ return b;
+}
+
+/* { dg-final { scan-assembler-not "fmv.h" } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64if -mabi=lp64f -O" } */
+
+_Float16 test_soft_add (_Float16 a, _Float16 b)
+{
+ /* Make sure __addhf3 not invoked here. */
+ /* { dg-final { scan-assembler-times "call\t__extendhfsf2" 2 } } */
+ return a + b;
+ /* { dg-final { scan-assembler-not "call\t__addhf3" } } */
+ /* { dg-final { scan-assembler-times "fadd.s" 1 } } */
+ /* { dg-final { scan-assembler-times "call\t__truncsfhf2" 1 } } */
+}
+
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64if -mabi=lp64f -O" } */
+
+int test_soft_compare (_Float16 a, _Float16 b)
+{
+ /* Make sure __gthf2 not invoked here. */
+ /* { dg-final { scan-assembler-times "call\t__extendhfsf2" 2 } } */
+ return a > b;
+ /* { dg-final { scan-assembler-not "call\t__gthf2" } } */
+ /* { dg-final { scan-assembler-times "fgt.s" 1 } } */
+}
+
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64i -mabi=lp64 -O" } */
+
+int test_soft_compare (_Float16 a, _Float16 b)
+{
+ /* Make sure __gthf2 not invoked here. */
+ /* { dg-final { scan-assembler-times "call\t__extendhfsf2" 2 } } */
+ return a > b;
+ /* { dg-final { scan-assembler-not "call\t__gthf2" } } */
+ /* { dg-final { scan-assembler-times "call\t__gtsf2" 1 } } */
+}
+
new file mode 100644
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+_Float16 x;
+
+_Float16 foo1 (_Float16 a, _Float16 b)
+{
+ return a + b;
+}
+
+_Float16 foo2 (_Float16 a, _Float16 b)
+{
+ return a * b;
+}
+
+int foo3 (_Float16 a, _Float16 b)
+{
+ return a > b;
+}
@@ -41,6 +41,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv(D,R,X,Y)
#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_4_udiv(Q,R,X,Y)
+#define _FP_NANFRAC_H _FP_QNANBIT_H
#define _FP_NANFRAC_S _FP_QNANBIT_S
#define _FP_NANFRAC_D _FP_QNANBIT_D, 0
#define _FP_NANFRAC_Q _FP_QNANBIT_Q, 0, 0, 0
@@ -63,6 +64,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_1_udiv_norm(D,R,X,Y)
#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_2_udiv(Q,R,X,Y)
+#define _FP_NANFRAC_H _FP_QNANBIT_H
#define _FP_NANFRAC_S _FP_QNANBIT_S
#define _FP_NANFRAC_D _FP_QNANBIT_D
#define _FP_NANFRAC_Q _FP_QNANBIT_Q, 0
@@ -80,6 +82,7 @@ typedef unsigned int UTItype __attribute__ ((mode (TI)));
typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__)));
#define CMPtype __gcc_CMPtype
+#define _FP_NANSIGN_H 0
#define _FP_NANSIGN_S 0
#define _FP_NANSIGN_D 0
#define _FP_NANSIGN_Q 0
@@ -41,3 +41,8 @@ else
softfp_extras := divsf3 divdf3 divtf3
endif
+
+softfp_extensions += hfsf hfdf hftf
+softfp_truncations += tfhf dfhf sfhf
+softfp_extras += fixhfsi fixhfdi fixunshfsi fixunshfdi \
+ floatsihf floatdihf floatunsihf floatundihf
@@ -1,3 +1,4 @@
include $(srcdir)/config/riscv/t-softfp32
softfp_int_modes += ti
+softfp_extras += fixhfti fixunshfti floattihf floatuntihf