c-family: implement -ffp-contract=on

Message ID 20230518210331.11564-1-amonakov@ispras.ru
State Accepted
Headers
Series c-family: implement -ffp-contract=on |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

Alexander Monakov May 18, 2023, 9:03 p.m. UTC
  Implement -ffp-contract=on for C and C++ without changing default
behavior (=off for -std=cNN, =fast for C++ and -std=gnuNN).

gcc/c-family/ChangeLog:

	* c-gimplify.cc (fma_supported_p): New helper.
	(c_gimplify_expr) [PLUS_EXPR, MINUS_EXPR]: Implement FMA
	contraction.

gcc/ChangeLog:

	* common.opt (fp_contract_mode) [on]: Remove fallback.
	* config/sh/sh.md (*fmasf4): Correct flag_fp_contract_mode test.
	* doc/invoke.texi (-ffp-contract): Update.
	* trans-mem.cc (diagnose_tm_1): Skip internal function calls.
---
 gcc/c-family/c-gimplify.cc | 78 ++++++++++++++++++++++++++++++++++++++
 gcc/common.opt             |  3 +-
 gcc/config/sh/sh.md        |  2 +-
 gcc/doc/invoke.texi        |  8 ++--
 gcc/trans-mem.cc           |  3 ++
 5 files changed, 88 insertions(+), 6 deletions(-)
  

Comments

Richard Biener May 22, 2023, 10:02 a.m. UTC | #1
On Thu, May 18, 2023 at 11:04 PM Alexander Monakov via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Implement -ffp-contract=on for C and C++ without changing default
> behavior (=off for -std=cNN, =fast for C++ and -std=gnuNN).

The documentation changes mention the defaults are changed for
standard modes, I suppose you want to remove that hunk.

> gcc/c-family/ChangeLog:
>
>         * c-gimplify.cc (fma_supported_p): New helper.
>         (c_gimplify_expr) [PLUS_EXPR, MINUS_EXPR]: Implement FMA
>         contraction.
>
> gcc/ChangeLog:
>
>         * common.opt (fp_contract_mode) [on]: Remove fallback.
>         * config/sh/sh.md (*fmasf4): Correct flag_fp_contract_mode test.
>         * doc/invoke.texi (-ffp-contract): Update.
>         * trans-mem.cc (diagnose_tm_1): Skip internal function calls.
> ---
>  gcc/c-family/c-gimplify.cc | 78 ++++++++++++++++++++++++++++++++++++++
>  gcc/common.opt             |  3 +-
>  gcc/config/sh/sh.md        |  2 +-
>  gcc/doc/invoke.texi        |  8 ++--
>  gcc/trans-mem.cc           |  3 ++
>  5 files changed, 88 insertions(+), 6 deletions(-)
>
> diff --git a/gcc/c-family/c-gimplify.cc b/gcc/c-family/c-gimplify.cc
> index ef5c7d919f..f7635d3b0c 100644
> --- a/gcc/c-family/c-gimplify.cc
> +++ b/gcc/c-family/c-gimplify.cc
> @@ -41,6 +41,8 @@ along with GCC; see the file COPYING3.  If not see
>  #include "c-ubsan.h"
>  #include "tree-nested.h"
>  #include "context.h"
> +#include "tree-pass.h"
> +#include "internal-fn.h"
>
>  /*  The gimplification pass converts the language-dependent trees
>      (ld-trees) emitted by the parser into language-independent trees
> @@ -686,6 +688,14 @@ c_build_bind_expr (location_t loc, tree block, tree body)
>    return bind;
>  }
>
> +/* Helper for c_gimplify_expr: test if target supports fma-like FN.  */
> +
> +static bool
> +fma_supported_p (enum internal_fn fn, tree type)
> +{
> +  return direct_internal_fn_supported_p (fn, type, OPTIMIZE_FOR_BOTH);
> +}
> +
>  /* Gimplification of expression trees.  */
>
>  /* Do C-specific gimplification on *EXPR_P.  PRE_P and POST_P are as in
> @@ -739,6 +749,74 @@ c_gimplify_expr (tree *expr_p, gimple_seq *pre_p ATTRIBUTE_UNUSED,
>         break;
>        }
>
> +    case PLUS_EXPR:
> +    case MINUS_EXPR:
> +      {
> +       tree type = TREE_TYPE (*expr_p);
> +       /* For -ffp-contract=on we need to attempt FMA contraction only
> +          during initial gimplification.  Late contraction across statement
> +          boundaries would violate language semantics.  */
> +       if (SCALAR_FLOAT_TYPE_P (type)
> +           && flag_fp_contract_mode == FP_CONTRACT_ON
> +           && cfun && !(cfun->curr_properties & PROP_gimple_any)
> +           && fma_supported_p (IFN_FMA, type))
> +         {
> +           bool neg_mul = false, neg_add = code == MINUS_EXPR;
> +
> +           tree *op0_p = &TREE_OPERAND (*expr_p, 0);
> +           tree *op1_p = &TREE_OPERAND (*expr_p, 1);
> +
> +           /* Look for ±(x * y) ± z, swapping operands if necessary.  */
> +           if (TREE_CODE (*op0_p) == NEGATE_EXPR
> +               && TREE_CODE (TREE_OPERAND (*op0_p, 0)) == MULT_EXPR)
> +             /* '*EXPR_P' is '-(x * y) ± z'.  This is fine.  */;
> +           else if (TREE_CODE (*op0_p) != MULT_EXPR)
> +             {
> +               std::swap (op0_p, op1_p);
> +               std::swap (neg_mul, neg_add);
> +             }
> +           if (TREE_CODE (*op0_p) == NEGATE_EXPR)
> +             {
> +               op0_p = &TREE_OPERAND (*op0_p, 0);
> +               neg_mul = !neg_mul;
> +             }
> +           if (TREE_CODE (*op0_p) != MULT_EXPR)
> +             break;
> +           auto_vec<tree, 3> ops (3);
> +           ops.quick_push (TREE_OPERAND (*op0_p, 0));
> +           ops.quick_push (TREE_OPERAND (*op0_p, 1));
> +           ops.quick_push (*op1_p);
> +
> +           enum internal_fn ifn = IFN_FMA;
> +           if (neg_mul)
> +             {
> +               if (fma_supported_p (IFN_FNMA, type))
> +                 ifn = IFN_FNMA;
> +               else
> +                 ops[0] = build1 (NEGATE_EXPR, type, ops[0]);
> +             }
> +           if (neg_add)
> +             {
> +               enum internal_fn ifn2 = ifn == IFN_FMA ? IFN_FMS : IFN_FNMS;
> +               if (fma_supported_p (ifn2, type))
> +                 ifn = ifn2;
> +               else
> +                 ops[2] = build1 (NEGATE_EXPR, type, ops[2]);
> +             }
> +           for (auto &&op : ops)
> +             if (gimplify_expr (&op, pre_p, post_p, is_gimple_val, fb_rvalue)
> +                 == GS_ERROR)
> +               return GS_ERROR;
> +
> +           gcall *call = gimple_build_call_internal_vec (ifn, ops);
> +           gimple_seq_add_stmt_without_update (pre_p, call);
> +           *expr_p = create_tmp_var (type);
> +           gimple_call_set_lhs (call, *expr_p);

it would be possible to do

  *expr_p = build_call_expr_internal (ifn, type, ops[0], ops[1]. ops[2]);
  return GS_OK;

and not worry about temporary creation and gimplifying of the operands.
That would in theory also leave the possibility to do this during
genericization instead (and avoid the guard against late invocation of
the hook).

Otherwise it looks OK, but I'll let frontend maintainers have a chance to look
as well.

Thanks for tackling this long-standing issue.
Richard.

> +           return GS_ALL_DONE;
> +         }
> +       break;
> +      }
> +
>      default:;
>      }
>
> diff --git a/gcc/common.opt b/gcc/common.opt
> index a28ca13385..3daec85aef 100644
> --- a/gcc/common.opt
> +++ b/gcc/common.opt
> @@ -1662,9 +1662,8 @@ Name(fp_contract_mode) Type(enum fp_contract_mode) UnknownError(unknown floating
>  EnumValue
>  Enum(fp_contract_mode) String(off) Value(FP_CONTRACT_OFF)
>
> -; Not implemented, fall back to conservative FP_CONTRACT_OFF.
>  EnumValue
> -Enum(fp_contract_mode) String(on) Value(FP_CONTRACT_OFF)
> +Enum(fp_contract_mode) String(on) Value(FP_CONTRACT_ON)
>
>  EnumValue
>  Enum(fp_contract_mode) String(fast) Value(FP_CONTRACT_FAST)
> diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
> index 4622dba012..5cb1795482 100644
> --- a/gcc/config/sh/sh.md
> +++ b/gcc/config/sh/sh.md
> @@ -9269,7 +9269,7 @@ (define_insn_and_split "*fmasf4"
>                  (match_operand:SF 3 "arith_reg_operand" "0")))
>     (clobber (reg:SI FPSCR_STAT_REG))
>     (use (reg:SI FPSCR_MODES_REG))]
> -  "TARGET_SH2E && flag_fp_contract_mode != FP_CONTRACT_OFF"
> +  "TARGET_SH2E && flag_fp_contract_mode == FP_CONTRACT_FAST"
>    "fmac        %1,%2,%0"
>    "&& can_create_pseudo_p ()"
>    [(parallel [(set (match_dup 0)
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index b92b857602..cb1e9a1d9f 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -11983,10 +11983,12 @@ This option is enabled by default at optimization levels @option{-O1},
>  such as forming of fused multiply-add operations if the target has
>  native support for them.
>  @option{-ffp-contract=on} enables floating-point expression contraction
> -if allowed by the language standard.  This is currently not implemented
> -and treated equal to @option{-ffp-contract=off}.
> +if allowed by the language standard.  This is implemented for C and C++,
> +where it enables contraction within one expression, but not across
> +different statements.
>
> -The default is @option{-ffp-contract=fast}.
> +The default is @option{-ffp-contract=off} for C in a standards compliant mode
> +(@option{-std=c11} or similar), @option{-ffp-contract=fast} otherwise.
>
>  @opindex fomit-frame-pointer
>  @item -fomit-frame-pointer
> diff --git a/gcc/trans-mem.cc b/gcc/trans-mem.cc
> index 4b129663e0..2174faef4c 100644
> --- a/gcc/trans-mem.cc
> +++ b/gcc/trans-mem.cc
> @@ -637,6 +637,9 @@ diagnose_tm_1 (gimple_stmt_iterator *gsi, bool *handled_ops_p,
>      {
>      case GIMPLE_CALL:
>        {
> +       if (gimple_call_internal_p (stmt))
> +         break;
> +
>         tree fn = gimple_call_fn (stmt);
>
>         if ((d->summary_flags & DIAG_TM_OUTER) == 0
> --
> 2.39.2
>
  
Alexander Monakov May 22, 2023, 3:16 p.m. UTC | #2
On Mon, 22 May 2023, Richard Biener wrote:

> On Thu, May 18, 2023 at 11:04 PM Alexander Monakov via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > Implement -ffp-contract=on for C and C++ without changing default
> > behavior (=off for -std=cNN, =fast for C++ and -std=gnuNN).
> 
> The documentation changes mention the defaults are changed for
> standard modes, I suppose you want to remove that hunk.

No, the current documentation is incomplete, and that hunk extends it
to match the current GCC behavior. Should I break it out to a separate
patch? I see this drive-by fix could look confusing — sorry about that.

> it would be possible to do
> 
>   *expr_p = build_call_expr_internal (ifn, type, ops[0], ops[1]. ops[2]);
>   return GS_OK;
> 
> and not worry about temporary creation and gimplifying of the operands.
> That would in theory also leave the possibility to do this during
> genericization instead (and avoid the guard against late invocation of
> the hook).

Ah, no, I deliberately decided against that, because that way we would go
via gimplify_arg, which would emit all side effects in *pre_p. That seems
wrong if arguments had side-effects that should go in *post_p.

Thanks.
Alexander

> Otherwise it looks OK, but I'll let frontend maintainers have a chance to look
> as well.
> 
> Thanks for tackling this long-standing issue.
> Richard.
  
Richard Biener May 23, 2023, 6:05 a.m. UTC | #3
On Mon, May 22, 2023 at 5:16 PM Alexander Monakov <amonakov@ispras.ru> wrote:
>
>
> On Mon, 22 May 2023, Richard Biener wrote:
>
> > On Thu, May 18, 2023 at 11:04 PM Alexander Monakov via Gcc-patches
> > <gcc-patches@gcc.gnu.org> wrote:
> > >
> > > Implement -ffp-contract=on for C and C++ without changing default
> > > behavior (=off for -std=cNN, =fast for C++ and -std=gnuNN).
> >
> > The documentation changes mention the defaults are changed for
> > standard modes, I suppose you want to remove that hunk.
>
> No, the current documentation is incomplete, and that hunk extends it
> to match the current GCC behavior. Should I break it out to a separate
> patch? I see this drive-by fix could look confusing — sorry about that.
>
> > it would be possible to do
> >
> >   *expr_p = build_call_expr_internal (ifn, type, ops[0], ops[1]. ops[2]);
> >   return GS_OK;
> >
> > and not worry about temporary creation and gimplifying of the operands.
> > That would in theory also leave the possibility to do this during
> > genericization instead (and avoid the guard against late invocation of
> > the hook).
>
> Ah, no, I deliberately decided against that, because that way we would go
> via gimplify_arg, which would emit all side effects in *pre_p. That seems
> wrong if arguments had side-effects that should go in *post_p.

Ah, true - that warrants a comment though.

Richard.

>
> Thanks.
> Alexander
>
> > Otherwise it looks OK, but I'll let frontend maintainers have a chance to look
> > as well.
> >
> > Thanks for tackling this long-standing issue.
> > Richard.
  
Alexander Monakov May 23, 2023, 11:21 a.m. UTC | #4
On Tue, 23 May 2023, Richard Biener wrote:
> > Ah, no, I deliberately decided against that, because that way we would go
> > via gimplify_arg, which would emit all side effects in *pre_p. That seems
> > wrong if arguments had side-effects that should go in *post_p.
> 
> Ah, true - that warrants a comment though.

Incrementally fixed up in my tree like this:

diff --git a/gcc/c-family/c-gimplify.cc b/gcc/c-family/c-gimplify.cc
index f7635d3b0c..17b0610a89 100644
--- a/gcc/c-family/c-gimplify.cc
+++ b/gcc/c-family/c-gimplify.cc
@@ -803,6 +803,7 @@ c_gimplify_expr (tree *expr_p, gimple_seq *pre_p ATTRIBUTE_UNUSED,
                else
                  ops[2] = build1 (NEGATE_EXPR, type, ops[2]);
              }
+           /* Avoid gimplify_arg: it emits all side effects into *PRE_P.  */
            for (auto &&op : ops)
              if (gimplify_expr (&op, pre_p, post_p, is_gimple_val, fb_rvalue)
                  == GS_ERROR)

Alexander
  
Alexander Monakov June 5, 2023, 3:33 p.m. UTC | #5
Ping for the front-end maintainers' input.

On Mon, 22 May 2023, Richard Biener wrote:

> On Thu, May 18, 2023 at 11:04 PM Alexander Monakov via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > Implement -ffp-contract=on for C and C++ without changing default
> > behavior (=off for -std=cNN, =fast for C++ and -std=gnuNN).
> 
> The documentation changes mention the defaults are changed for
> standard modes, I suppose you want to remove that hunk.
> 
> > gcc/c-family/ChangeLog:
> >
> >         * c-gimplify.cc (fma_supported_p): New helper.
> >         (c_gimplify_expr) [PLUS_EXPR, MINUS_EXPR]: Implement FMA
> >         contraction.
> >
> > gcc/ChangeLog:
> >
> >         * common.opt (fp_contract_mode) [on]: Remove fallback.
> >         * config/sh/sh.md (*fmasf4): Correct flag_fp_contract_mode test.
> >         * doc/invoke.texi (-ffp-contract): Update.
> >         * trans-mem.cc (diagnose_tm_1): Skip internal function calls.
> > ---
> >  gcc/c-family/c-gimplify.cc | 78 ++++++++++++++++++++++++++++++++++++++
> >  gcc/common.opt             |  3 +-
> >  gcc/config/sh/sh.md        |  2 +-
> >  gcc/doc/invoke.texi        |  8 ++--
> >  gcc/trans-mem.cc           |  3 ++
> >  5 files changed, 88 insertions(+), 6 deletions(-)
> >
> > diff --git a/gcc/c-family/c-gimplify.cc b/gcc/c-family/c-gimplify.cc
> > index ef5c7d919f..f7635d3b0c 100644
> > --- a/gcc/c-family/c-gimplify.cc
> > +++ b/gcc/c-family/c-gimplify.cc
> > @@ -41,6 +41,8 @@ along with GCC; see the file COPYING3.  If not see
> >  #include "c-ubsan.h"
> >  #include "tree-nested.h"
> >  #include "context.h"
> > +#include "tree-pass.h"
> > +#include "internal-fn.h"
> >
> >  /*  The gimplification pass converts the language-dependent trees
> >      (ld-trees) emitted by the parser into language-independent trees
> > @@ -686,6 +688,14 @@ c_build_bind_expr (location_t loc, tree block, tree body)
> >    return bind;
> >  }
> >
> > +/* Helper for c_gimplify_expr: test if target supports fma-like FN.  */
> > +
> > +static bool
> > +fma_supported_p (enum internal_fn fn, tree type)
> > +{
> > +  return direct_internal_fn_supported_p (fn, type, OPTIMIZE_FOR_BOTH);
> > +}
> > +
> >  /* Gimplification of expression trees.  */
> >
> >  /* Do C-specific gimplification on *EXPR_P.  PRE_P and POST_P are as in
> > @@ -739,6 +749,74 @@ c_gimplify_expr (tree *expr_p, gimple_seq *pre_p ATTRIBUTE_UNUSED,
> >         break;
> >        }
> >
> > +    case PLUS_EXPR:
> > +    case MINUS_EXPR:
> > +      {
> > +       tree type = TREE_TYPE (*expr_p);
> > +       /* For -ffp-contract=on we need to attempt FMA contraction only
> > +          during initial gimplification.  Late contraction across statement
> > +          boundaries would violate language semantics.  */
> > +       if (SCALAR_FLOAT_TYPE_P (type)
> > +           && flag_fp_contract_mode == FP_CONTRACT_ON
> > +           && cfun && !(cfun->curr_properties & PROP_gimple_any)
> > +           && fma_supported_p (IFN_FMA, type))
> > +         {
> > +           bool neg_mul = false, neg_add = code == MINUS_EXPR;
> > +
> > +           tree *op0_p = &TREE_OPERAND (*expr_p, 0);
> > +           tree *op1_p = &TREE_OPERAND (*expr_p, 1);
> > +
> > +           /* Look for ±(x * y) ± z, swapping operands if necessary.  */
> > +           if (TREE_CODE (*op0_p) == NEGATE_EXPR
> > +               && TREE_CODE (TREE_OPERAND (*op0_p, 0)) == MULT_EXPR)
> > +             /* '*EXPR_P' is '-(x * y) ± z'.  This is fine.  */;
> > +           else if (TREE_CODE (*op0_p) != MULT_EXPR)
> > +             {
> > +               std::swap (op0_p, op1_p);
> > +               std::swap (neg_mul, neg_add);
> > +             }
> > +           if (TREE_CODE (*op0_p) == NEGATE_EXPR)
> > +             {
> > +               op0_p = &TREE_OPERAND (*op0_p, 0);
> > +               neg_mul = !neg_mul;
> > +             }
> > +           if (TREE_CODE (*op0_p) != MULT_EXPR)
> > +             break;
> > +           auto_vec<tree, 3> ops (3);
> > +           ops.quick_push (TREE_OPERAND (*op0_p, 0));
> > +           ops.quick_push (TREE_OPERAND (*op0_p, 1));
> > +           ops.quick_push (*op1_p);
> > +
> > +           enum internal_fn ifn = IFN_FMA;
> > +           if (neg_mul)
> > +             {
> > +               if (fma_supported_p (IFN_FNMA, type))
> > +                 ifn = IFN_FNMA;
> > +               else
> > +                 ops[0] = build1 (NEGATE_EXPR, type, ops[0]);
> > +             }
> > +           if (neg_add)
> > +             {
> > +               enum internal_fn ifn2 = ifn == IFN_FMA ? IFN_FMS : IFN_FNMS;
> > +               if (fma_supported_p (ifn2, type))
> > +                 ifn = ifn2;
> > +               else
> > +                 ops[2] = build1 (NEGATE_EXPR, type, ops[2]);
> > +             }
> > +           for (auto &&op : ops)
> > +             if (gimplify_expr (&op, pre_p, post_p, is_gimple_val, fb_rvalue)
> > +                 == GS_ERROR)
> > +               return GS_ERROR;
> > +
> > +           gcall *call = gimple_build_call_internal_vec (ifn, ops);
> > +           gimple_seq_add_stmt_without_update (pre_p, call);
> > +           *expr_p = create_tmp_var (type);
> > +           gimple_call_set_lhs (call, *expr_p);
> 
> it would be possible to do
> 
>   *expr_p = build_call_expr_internal (ifn, type, ops[0], ops[1]. ops[2]);
>   return GS_OK;
> 
> and not worry about temporary creation and gimplifying of the operands.
> That would in theory also leave the possibility to do this during
> genericization instead (and avoid the guard against late invocation of
> the hook).
> 
> Otherwise it looks OK, but I'll let frontend maintainers have a chance to look
> as well.
> 
> Thanks for tackling this long-standing issue.
> Richard.
> 
> > +           return GS_ALL_DONE;
> > +         }
> > +       break;
> > +      }
> > +
> >      default:;
> >      }
> >
> > diff --git a/gcc/common.opt b/gcc/common.opt
> > index a28ca13385..3daec85aef 100644
> > --- a/gcc/common.opt
> > +++ b/gcc/common.opt
> > @@ -1662,9 +1662,8 @@ Name(fp_contract_mode) Type(enum fp_contract_mode) UnknownError(unknown floating
> >  EnumValue
> >  Enum(fp_contract_mode) String(off) Value(FP_CONTRACT_OFF)
> >
> > -; Not implemented, fall back to conservative FP_CONTRACT_OFF.
> >  EnumValue
> > -Enum(fp_contract_mode) String(on) Value(FP_CONTRACT_OFF)
> > +Enum(fp_contract_mode) String(on) Value(FP_CONTRACT_ON)
> >
> >  EnumValue
> >  Enum(fp_contract_mode) String(fast) Value(FP_CONTRACT_FAST)
> > diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
> > index 4622dba012..5cb1795482 100644
> > --- a/gcc/config/sh/sh.md
> > +++ b/gcc/config/sh/sh.md
> > @@ -9269,7 +9269,7 @@ (define_insn_and_split "*fmasf4"
> >                  (match_operand:SF 3 "arith_reg_operand" "0")))
> >     (clobber (reg:SI FPSCR_STAT_REG))
> >     (use (reg:SI FPSCR_MODES_REG))]
> > -  "TARGET_SH2E && flag_fp_contract_mode != FP_CONTRACT_OFF"
> > +  "TARGET_SH2E && flag_fp_contract_mode == FP_CONTRACT_FAST"
> >    "fmac        %1,%2,%0"
> >    "&& can_create_pseudo_p ()"
> >    [(parallel [(set (match_dup 0)
> > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> > index b92b857602..cb1e9a1d9f 100644
> > --- a/gcc/doc/invoke.texi
> > +++ b/gcc/doc/invoke.texi
> > @@ -11983,10 +11983,12 @@ This option is enabled by default at optimization levels @option{-O1},
> >  such as forming of fused multiply-add operations if the target has
> >  native support for them.
> >  @option{-ffp-contract=on} enables floating-point expression contraction
> > -if allowed by the language standard.  This is currently not implemented
> > -and treated equal to @option{-ffp-contract=off}.
> > +if allowed by the language standard.  This is implemented for C and C++,
> > +where it enables contraction within one expression, but not across
> > +different statements.
> >
> > -The default is @option{-ffp-contract=fast}.
> > +The default is @option{-ffp-contract=off} for C in a standards compliant mode
> > +(@option{-std=c11} or similar), @option{-ffp-contract=fast} otherwise.
> >
> >  @opindex fomit-frame-pointer
> >  @item -fomit-frame-pointer
> > diff --git a/gcc/trans-mem.cc b/gcc/trans-mem.cc
> > index 4b129663e0..2174faef4c 100644
> > --- a/gcc/trans-mem.cc
> > +++ b/gcc/trans-mem.cc
> > @@ -637,6 +637,9 @@ diagnose_tm_1 (gimple_stmt_iterator *gsi, bool *handled_ops_p,
> >      {
> >      case GIMPLE_CALL:
> >        {
> > +       if (gimple_call_internal_p (stmt))
> > +         break;
> > +
> >         tree fn = gimple_call_fn (stmt);
> >
> >         if ((d->summary_flags & DIAG_TM_OUTER) == 0
> > --
> > 2.39.2
> >
>
  
Alexander Monakov June 19, 2023, 5:03 p.m. UTC | #6
Ping. OK for trunk?

On Mon, 5 Jun 2023, Alexander Monakov wrote:

> Ping for the front-end maintainers' input.
> 
> On Mon, 22 May 2023, Richard Biener wrote:
> 
> > On Thu, May 18, 2023 at 11:04 PM Alexander Monakov via Gcc-patches
> > <gcc-patches@gcc.gnu.org> wrote:
> > >
> > > Implement -ffp-contract=on for C and C++ without changing default
> > > behavior (=off for -std=cNN, =fast for C++ and -std=gnuNN).
> > 
> > The documentation changes mention the defaults are changed for
> > standard modes, I suppose you want to remove that hunk.
> > 
> > > gcc/c-family/ChangeLog:
> > >
> > >         * c-gimplify.cc (fma_supported_p): New helper.
> > >         (c_gimplify_expr) [PLUS_EXPR, MINUS_EXPR]: Implement FMA
> > >         contraction.
> > >
> > > gcc/ChangeLog:
> > >
> > >         * common.opt (fp_contract_mode) [on]: Remove fallback.
> > >         * config/sh/sh.md (*fmasf4): Correct flag_fp_contract_mode test.
> > >         * doc/invoke.texi (-ffp-contract): Update.
> > >         * trans-mem.cc (diagnose_tm_1): Skip internal function calls.
> > > ---
> > >  gcc/c-family/c-gimplify.cc | 78 ++++++++++++++++++++++++++++++++++++++
> > >  gcc/common.opt             |  3 +-
> > >  gcc/config/sh/sh.md        |  2 +-
> > >  gcc/doc/invoke.texi        |  8 ++--
> > >  gcc/trans-mem.cc           |  3 ++
> > >  5 files changed, 88 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/gcc/c-family/c-gimplify.cc b/gcc/c-family/c-gimplify.cc
> > > index ef5c7d919f..f7635d3b0c 100644
> > > --- a/gcc/c-family/c-gimplify.cc
> > > +++ b/gcc/c-family/c-gimplify.cc
> > > @@ -41,6 +41,8 @@ along with GCC; see the file COPYING3.  If not see
> > >  #include "c-ubsan.h"
> > >  #include "tree-nested.h"
> > >  #include "context.h"
> > > +#include "tree-pass.h"
> > > +#include "internal-fn.h"
> > >
> > >  /*  The gimplification pass converts the language-dependent trees
> > >      (ld-trees) emitted by the parser into language-independent trees
> > > @@ -686,6 +688,14 @@ c_build_bind_expr (location_t loc, tree block, tree body)
> > >    return bind;
> > >  }
> > >
> > > +/* Helper for c_gimplify_expr: test if target supports fma-like FN.  */
> > > +
> > > +static bool
> > > +fma_supported_p (enum internal_fn fn, tree type)
> > > +{
> > > +  return direct_internal_fn_supported_p (fn, type, OPTIMIZE_FOR_BOTH);
> > > +}
> > > +
> > >  /* Gimplification of expression trees.  */
> > >
> > >  /* Do C-specific gimplification on *EXPR_P.  PRE_P and POST_P are as in
> > > @@ -739,6 +749,74 @@ c_gimplify_expr (tree *expr_p, gimple_seq *pre_p ATTRIBUTE_UNUSED,
> > >         break;
> > >        }
> > >
> > > +    case PLUS_EXPR:
> > > +    case MINUS_EXPR:
> > > +      {
> > > +       tree type = TREE_TYPE (*expr_p);
> > > +       /* For -ffp-contract=on we need to attempt FMA contraction only
> > > +          during initial gimplification.  Late contraction across statement
> > > +          boundaries would violate language semantics.  */
> > > +       if (SCALAR_FLOAT_TYPE_P (type)
> > > +           && flag_fp_contract_mode == FP_CONTRACT_ON
> > > +           && cfun && !(cfun->curr_properties & PROP_gimple_any)
> > > +           && fma_supported_p (IFN_FMA, type))
> > > +         {
> > > +           bool neg_mul = false, neg_add = code == MINUS_EXPR;
> > > +
> > > +           tree *op0_p = &TREE_OPERAND (*expr_p, 0);
> > > +           tree *op1_p = &TREE_OPERAND (*expr_p, 1);
> > > +
> > > +           /* Look for ±(x * y) ± z, swapping operands if necessary.  */
> > > +           if (TREE_CODE (*op0_p) == NEGATE_EXPR
> > > +               && TREE_CODE (TREE_OPERAND (*op0_p, 0)) == MULT_EXPR)
> > > +             /* '*EXPR_P' is '-(x * y) ± z'.  This is fine.  */;
> > > +           else if (TREE_CODE (*op0_p) != MULT_EXPR)
> > > +             {
> > > +               std::swap (op0_p, op1_p);
> > > +               std::swap (neg_mul, neg_add);
> > > +             }
> > > +           if (TREE_CODE (*op0_p) == NEGATE_EXPR)
> > > +             {
> > > +               op0_p = &TREE_OPERAND (*op0_p, 0);
> > > +               neg_mul = !neg_mul;
> > > +             }
> > > +           if (TREE_CODE (*op0_p) != MULT_EXPR)
> > > +             break;
> > > +           auto_vec<tree, 3> ops (3);
> > > +           ops.quick_push (TREE_OPERAND (*op0_p, 0));
> > > +           ops.quick_push (TREE_OPERAND (*op0_p, 1));
> > > +           ops.quick_push (*op1_p);
> > > +
> > > +           enum internal_fn ifn = IFN_FMA;
> > > +           if (neg_mul)
> > > +             {
> > > +               if (fma_supported_p (IFN_FNMA, type))
> > > +                 ifn = IFN_FNMA;
> > > +               else
> > > +                 ops[0] = build1 (NEGATE_EXPR, type, ops[0]);
> > > +             }
> > > +           if (neg_add)
> > > +             {
> > > +               enum internal_fn ifn2 = ifn == IFN_FMA ? IFN_FMS : IFN_FNMS;
> > > +               if (fma_supported_p (ifn2, type))
> > > +                 ifn = ifn2;
> > > +               else
> > > +                 ops[2] = build1 (NEGATE_EXPR, type, ops[2]);
> > > +             }
> > > +           for (auto &&op : ops)
> > > +             if (gimplify_expr (&op, pre_p, post_p, is_gimple_val, fb_rvalue)
> > > +                 == GS_ERROR)
> > > +               return GS_ERROR;
> > > +
> > > +           gcall *call = gimple_build_call_internal_vec (ifn, ops);
> > > +           gimple_seq_add_stmt_without_update (pre_p, call);
> > > +           *expr_p = create_tmp_var (type);
> > > +           gimple_call_set_lhs (call, *expr_p);
> > 
> > it would be possible to do
> > 
> >   *expr_p = build_call_expr_internal (ifn, type, ops[0], ops[1]. ops[2]);
> >   return GS_OK;
> > 
> > and not worry about temporary creation and gimplifying of the operands.
> > That would in theory also leave the possibility to do this during
> > genericization instead (and avoid the guard against late invocation of
> > the hook).
> > 
> > Otherwise it looks OK, but I'll let frontend maintainers have a chance to look
> > as well.
> > 
> > Thanks for tackling this long-standing issue.
> > Richard.
> > 
> > > +           return GS_ALL_DONE;
> > > +         }
> > > +       break;
> > > +      }
> > > +
> > >      default:;
> > >      }
> > >
> > > diff --git a/gcc/common.opt b/gcc/common.opt
> > > index a28ca13385..3daec85aef 100644
> > > --- a/gcc/common.opt
> > > +++ b/gcc/common.opt
> > > @@ -1662,9 +1662,8 @@ Name(fp_contract_mode) Type(enum fp_contract_mode) UnknownError(unknown floating
> > >  EnumValue
> > >  Enum(fp_contract_mode) String(off) Value(FP_CONTRACT_OFF)
> > >
> > > -; Not implemented, fall back to conservative FP_CONTRACT_OFF.
> > >  EnumValue
> > > -Enum(fp_contract_mode) String(on) Value(FP_CONTRACT_OFF)
> > > +Enum(fp_contract_mode) String(on) Value(FP_CONTRACT_ON)
> > >
> > >  EnumValue
> > >  Enum(fp_contract_mode) String(fast) Value(FP_CONTRACT_FAST)
> > > diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
> > > index 4622dba012..5cb1795482 100644
> > > --- a/gcc/config/sh/sh.md
> > > +++ b/gcc/config/sh/sh.md
> > > @@ -9269,7 +9269,7 @@ (define_insn_and_split "*fmasf4"
> > >                  (match_operand:SF 3 "arith_reg_operand" "0")))
> > >     (clobber (reg:SI FPSCR_STAT_REG))
> > >     (use (reg:SI FPSCR_MODES_REG))]
> > > -  "TARGET_SH2E && flag_fp_contract_mode != FP_CONTRACT_OFF"
> > > +  "TARGET_SH2E && flag_fp_contract_mode == FP_CONTRACT_FAST"
> > >    "fmac        %1,%2,%0"
> > >    "&& can_create_pseudo_p ()"
> > >    [(parallel [(set (match_dup 0)
> > > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> > > index b92b857602..cb1e9a1d9f 100644
> > > --- a/gcc/doc/invoke.texi
> > > +++ b/gcc/doc/invoke.texi
> > > @@ -11983,10 +11983,12 @@ This option is enabled by default at optimization levels @option{-O1},
> > >  such as forming of fused multiply-add operations if the target has
> > >  native support for them.
> > >  @option{-ffp-contract=on} enables floating-point expression contraction
> > > -if allowed by the language standard.  This is currently not implemented
> > > -and treated equal to @option{-ffp-contract=off}.
> > > +if allowed by the language standard.  This is implemented for C and C++,
> > > +where it enables contraction within one expression, but not across
> > > +different statements.
> > >
> > > -The default is @option{-ffp-contract=fast}.
> > > +The default is @option{-ffp-contract=off} for C in a standards compliant mode
> > > +(@option{-std=c11} or similar), @option{-ffp-contract=fast} otherwise.
> > >
> > >  @opindex fomit-frame-pointer
> > >  @item -fomit-frame-pointer
> > > diff --git a/gcc/trans-mem.cc b/gcc/trans-mem.cc
> > > index 4b129663e0..2174faef4c 100644
> > > --- a/gcc/trans-mem.cc
> > > +++ b/gcc/trans-mem.cc
> > > @@ -637,6 +637,9 @@ diagnose_tm_1 (gimple_stmt_iterator *gsi, bool *handled_ops_p,
> > >      {
> > >      case GIMPLE_CALL:
> > >        {
> > > +       if (gimple_call_internal_p (stmt))
> > > +         break;
> > > +
> > >         tree fn = gimple_call_fn (stmt);
> > >
> > >         if ((d->summary_flags & DIAG_TM_OUTER) == 0
> > > --
> > > 2.39.2
> > >
> >
  
Richard Biener June 19, 2023, 6:23 p.m. UTC | #7
> Am 19.06.2023 um 19:03 schrieb Alexander Monakov <amonakov@ispras.ru>:
> 
> 
> Ping. OK for trunk?

Ok if the FE maintainers do not object within 48h.

Thanks,
Richard 

>> On Mon, 5 Jun 2023, Alexander Monakov wrote:
>> 
>> Ping for the front-end maintainers' input.
>> 
>>> On Mon, 22 May 2023, Richard Biener wrote:
>>> 
>>> On Thu, May 18, 2023 at 11:04 PM Alexander Monakov via Gcc-patches
>>> <gcc-patches@gcc.gnu.org> wrote:
>>>> 
>>>> Implement -ffp-contract=on for C and C++ without changing default
>>>> behavior (=off for -std=cNN, =fast for C++ and -std=gnuNN).
>>> 
>>> The documentation changes mention the defaults are changed for
>>> standard modes, I suppose you want to remove that hunk.
>>> 
>>>> gcc/c-family/ChangeLog:
>>>> 
>>>>        * c-gimplify.cc (fma_supported_p): New helper.
>>>>        (c_gimplify_expr) [PLUS_EXPR, MINUS_EXPR]: Implement FMA
>>>>        contraction.
>>>> 
>>>> gcc/ChangeLog:
>>>> 
>>>>        * common.opt (fp_contract_mode) [on]: Remove fallback.
>>>>        * config/sh/sh.md (*fmasf4): Correct flag_fp_contract_mode test.
>>>>        * doc/invoke.texi (-ffp-contract): Update.
>>>>        * trans-mem.cc (diagnose_tm_1): Skip internal function calls.
>>>> ---
>>>> gcc/c-family/c-gimplify.cc | 78 ++++++++++++++++++++++++++++++++++++++
>>>> gcc/common.opt             |  3 +-
>>>> gcc/config/sh/sh.md        |  2 +-
>>>> gcc/doc/invoke.texi        |  8 ++--
>>>> gcc/trans-mem.cc           |  3 ++
>>>> 5 files changed, 88 insertions(+), 6 deletions(-)
>>>> 
>>>> diff --git a/gcc/c-family/c-gimplify.cc b/gcc/c-family/c-gimplify.cc
>>>> index ef5c7d919f..f7635d3b0c 100644
>>>> --- a/gcc/c-family/c-gimplify.cc
>>>> +++ b/gcc/c-family/c-gimplify.cc
>>>> @@ -41,6 +41,8 @@ along with GCC; see the file COPYING3.  If not see
>>>> #include "c-ubsan.h"
>>>> #include "tree-nested.h"
>>>> #include "context.h"
>>>> +#include "tree-pass.h"
>>>> +#include "internal-fn.h"
>>>> 
>>>> /*  The gimplification pass converts the language-dependent trees
>>>>     (ld-trees) emitted by the parser into language-independent trees
>>>> @@ -686,6 +688,14 @@ c_build_bind_expr (location_t loc, tree block, tree body)
>>>>   return bind;
>>>> }
>>>> 
>>>> +/* Helper for c_gimplify_expr: test if target supports fma-like FN.  */
>>>> +
>>>> +static bool
>>>> +fma_supported_p (enum internal_fn fn, tree type)
>>>> +{
>>>> +  return direct_internal_fn_supported_p (fn, type, OPTIMIZE_FOR_BOTH);
>>>> +}
>>>> +
>>>> /* Gimplification of expression trees.  */
>>>> 
>>>> /* Do C-specific gimplification on *EXPR_P.  PRE_P and POST_P are as in
>>>> @@ -739,6 +749,74 @@ c_gimplify_expr (tree *expr_p, gimple_seq *pre_p ATTRIBUTE_UNUSED,
>>>>        break;
>>>>       }
>>>> 
>>>> +    case PLUS_EXPR:
>>>> +    case MINUS_EXPR:
>>>> +      {
>>>> +       tree type = TREE_TYPE (*expr_p);
>>>> +       /* For -ffp-contract=on we need to attempt FMA contraction only
>>>> +          during initial gimplification.  Late contraction across statement
>>>> +          boundaries would violate language semantics.  */
>>>> +       if (SCALAR_FLOAT_TYPE_P (type)
>>>> +           && flag_fp_contract_mode == FP_CONTRACT_ON
>>>> +           && cfun && !(cfun->curr_properties & PROP_gimple_any)
>>>> +           && fma_supported_p (IFN_FMA, type))
>>>> +         {
>>>> +           bool neg_mul = false, neg_add = code == MINUS_EXPR;
>>>> +
>>>> +           tree *op0_p = &TREE_OPERAND (*expr_p, 0);
>>>> +           tree *op1_p = &TREE_OPERAND (*expr_p, 1);
>>>> +
>>>> +           /* Look for ±(x * y) ± z, swapping operands if necessary.  */
>>>> +           if (TREE_CODE (*op0_p) == NEGATE_EXPR
>>>> +               && TREE_CODE (TREE_OPERAND (*op0_p, 0)) == MULT_EXPR)
>>>> +             /* '*EXPR_P' is '-(x * y) ± z'.  This is fine.  */;
>>>> +           else if (TREE_CODE (*op0_p) != MULT_EXPR)
>>>> +             {
>>>> +               std::swap (op0_p, op1_p);
>>>> +               std::swap (neg_mul, neg_add);
>>>> +             }
>>>> +           if (TREE_CODE (*op0_p) == NEGATE_EXPR)
>>>> +             {
>>>> +               op0_p = &TREE_OPERAND (*op0_p, 0);
>>>> +               neg_mul = !neg_mul;
>>>> +             }
>>>> +           if (TREE_CODE (*op0_p) != MULT_EXPR)
>>>> +             break;
>>>> +           auto_vec<tree, 3> ops (3);
>>>> +           ops.quick_push (TREE_OPERAND (*op0_p, 0));
>>>> +           ops.quick_push (TREE_OPERAND (*op0_p, 1));
>>>> +           ops.quick_push (*op1_p);
>>>> +
>>>> +           enum internal_fn ifn = IFN_FMA;
>>>> +           if (neg_mul)
>>>> +             {
>>>> +               if (fma_supported_p (IFN_FNMA, type))
>>>> +                 ifn = IFN_FNMA;
>>>> +               else
>>>> +                 ops[0] = build1 (NEGATE_EXPR, type, ops[0]);
>>>> +             }
>>>> +           if (neg_add)
>>>> +             {
>>>> +               enum internal_fn ifn2 = ifn == IFN_FMA ? IFN_FMS : IFN_FNMS;
>>>> +               if (fma_supported_p (ifn2, type))
>>>> +                 ifn = ifn2;
>>>> +               else
>>>> +                 ops[2] = build1 (NEGATE_EXPR, type, ops[2]);
>>>> +             }
>>>> +           for (auto &&op : ops)
>>>> +             if (gimplify_expr (&op, pre_p, post_p, is_gimple_val, fb_rvalue)
>>>> +                 == GS_ERROR)
>>>> +               return GS_ERROR;
>>>> +
>>>> +           gcall *call = gimple_build_call_internal_vec (ifn, ops);
>>>> +           gimple_seq_add_stmt_without_update (pre_p, call);
>>>> +           *expr_p = create_tmp_var (type);
>>>> +           gimple_call_set_lhs (call, *expr_p);
>>> 
>>> it would be possible to do
>>> 
>>>  *expr_p = build_call_expr_internal (ifn, type, ops[0], ops[1]. ops[2]);
>>>  return GS_OK;
>>> 
>>> and not worry about temporary creation and gimplifying of the operands.
>>> That would in theory also leave the possibility to do this during
>>> genericization instead (and avoid the guard against late invocation of
>>> the hook).
>>> 
>>> Otherwise it looks OK, but I'll let frontend maintainers have a chance to look
>>> as well.
>>> 
>>> Thanks for tackling this long-standing issue.
>>> Richard.
>>> 
>>>> +           return GS_ALL_DONE;
>>>> +         }
>>>> +       break;
>>>> +      }
>>>> +
>>>>     default:;
>>>>     }
>>>> 
>>>> diff --git a/gcc/common.opt b/gcc/common.opt
>>>> index a28ca13385..3daec85aef 100644
>>>> --- a/gcc/common.opt
>>>> +++ b/gcc/common.opt
>>>> @@ -1662,9 +1662,8 @@ Name(fp_contract_mode) Type(enum fp_contract_mode) UnknownError(unknown floating
>>>> EnumValue
>>>> Enum(fp_contract_mode) String(off) Value(FP_CONTRACT_OFF)
>>>> 
>>>> -; Not implemented, fall back to conservative FP_CONTRACT_OFF.
>>>> EnumValue
>>>> -Enum(fp_contract_mode) String(on) Value(FP_CONTRACT_OFF)
>>>> +Enum(fp_contract_mode) String(on) Value(FP_CONTRACT_ON)
>>>> 
>>>> EnumValue
>>>> Enum(fp_contract_mode) String(fast) Value(FP_CONTRACT_FAST)
>>>> diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
>>>> index 4622dba012..5cb1795482 100644
>>>> --- a/gcc/config/sh/sh.md
>>>> +++ b/gcc/config/sh/sh.md
>>>> @@ -9269,7 +9269,7 @@ (define_insn_and_split "*fmasf4"
>>>>                 (match_operand:SF 3 "arith_reg_operand" "0")))
>>>>    (clobber (reg:SI FPSCR_STAT_REG))
>>>>    (use (reg:SI FPSCR_MODES_REG))]
>>>> -  "TARGET_SH2E && flag_fp_contract_mode != FP_CONTRACT_OFF"
>>>> +  "TARGET_SH2E && flag_fp_contract_mode == FP_CONTRACT_FAST"
>>>>   "fmac        %1,%2,%0"
>>>>   "&& can_create_pseudo_p ()"
>>>>   [(parallel [(set (match_dup 0)
>>>> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
>>>> index b92b857602..cb1e9a1d9f 100644
>>>> --- a/gcc/doc/invoke.texi
>>>> +++ b/gcc/doc/invoke.texi
>>>> @@ -11983,10 +11983,12 @@ This option is enabled by default at optimization levels @option{-O1},
>>>> such as forming of fused multiply-add operations if the target has
>>>> native support for them.
>>>> @option{-ffp-contract=on} enables floating-point expression contraction
>>>> -if allowed by the language standard.  This is currently not implemented
>>>> -and treated equal to @option{-ffp-contract=off}.
>>>> +if allowed by the language standard.  This is implemented for C and C++,
>>>> +where it enables contraction within one expression, but not across
>>>> +different statements.
>>>> 
>>>> -The default is @option{-ffp-contract=fast}.
>>>> +The default is @option{-ffp-contract=off} for C in a standards compliant mode
>>>> +(@option{-std=c11} or similar), @option{-ffp-contract=fast} otherwise.
>>>> 
>>>> @opindex fomit-frame-pointer
>>>> @item -fomit-frame-pointer
>>>> diff --git a/gcc/trans-mem.cc b/gcc/trans-mem.cc
>>>> index 4b129663e0..2174faef4c 100644
>>>> --- a/gcc/trans-mem.cc
>>>> +++ b/gcc/trans-mem.cc
>>>> @@ -637,6 +637,9 @@ diagnose_tm_1 (gimple_stmt_iterator *gsi, bool *handled_ops_p,
>>>>     {
>>>>     case GIMPLE_CALL:
>>>>       {
>>>> +       if (gimple_call_internal_p (stmt))
>>>> +         break;
>>>> +
>>>>        tree fn = gimple_call_fn (stmt);
>>>> 
>>>>        if ((d->summary_flags & DIAG_TM_OUTER) == 0
>>>> --
>>>> 2.39.2
>>>>
  

Patch

diff --git a/gcc/c-family/c-gimplify.cc b/gcc/c-family/c-gimplify.cc
index ef5c7d919f..f7635d3b0c 100644
--- a/gcc/c-family/c-gimplify.cc
+++ b/gcc/c-family/c-gimplify.cc
@@ -41,6 +41,8 @@  along with GCC; see the file COPYING3.  If not see
 #include "c-ubsan.h"
 #include "tree-nested.h"
 #include "context.h"
+#include "tree-pass.h"
+#include "internal-fn.h"
 
 /*  The gimplification pass converts the language-dependent trees
     (ld-trees) emitted by the parser into language-independent trees
@@ -686,6 +688,14 @@  c_build_bind_expr (location_t loc, tree block, tree body)
   return bind;
 }
 
+/* Helper for c_gimplify_expr: test if target supports fma-like FN.  */
+
+static bool
+fma_supported_p (enum internal_fn fn, tree type)
+{
+  return direct_internal_fn_supported_p (fn, type, OPTIMIZE_FOR_BOTH);
+}
+
 /* Gimplification of expression trees.  */
 
 /* Do C-specific gimplification on *EXPR_P.  PRE_P and POST_P are as in
@@ -739,6 +749,74 @@  c_gimplify_expr (tree *expr_p, gimple_seq *pre_p ATTRIBUTE_UNUSED,
 	break;
       }
 
+    case PLUS_EXPR:
+    case MINUS_EXPR:
+      {
+	tree type = TREE_TYPE (*expr_p);
+	/* For -ffp-contract=on we need to attempt FMA contraction only
+	   during initial gimplification.  Late contraction across statement
+	   boundaries would violate language semantics.  */
+	if (SCALAR_FLOAT_TYPE_P (type)
+	    && flag_fp_contract_mode == FP_CONTRACT_ON
+	    && cfun && !(cfun->curr_properties & PROP_gimple_any)
+	    && fma_supported_p (IFN_FMA, type))
+	  {
+	    bool neg_mul = false, neg_add = code == MINUS_EXPR;
+
+	    tree *op0_p = &TREE_OPERAND (*expr_p, 0);
+	    tree *op1_p = &TREE_OPERAND (*expr_p, 1);
+
+	    /* Look for ±(x * y) ± z, swapping operands if necessary.  */
+	    if (TREE_CODE (*op0_p) == NEGATE_EXPR
+		&& TREE_CODE (TREE_OPERAND (*op0_p, 0)) == MULT_EXPR)
+	      /* '*EXPR_P' is '-(x * y) ± z'.  This is fine.  */;
+	    else if (TREE_CODE (*op0_p) != MULT_EXPR)
+	      {
+		std::swap (op0_p, op1_p);
+		std::swap (neg_mul, neg_add);
+	      }
+	    if (TREE_CODE (*op0_p) == NEGATE_EXPR)
+	      {
+		op0_p = &TREE_OPERAND (*op0_p, 0);
+		neg_mul = !neg_mul;
+	      }
+	    if (TREE_CODE (*op0_p) != MULT_EXPR)
+	      break;
+	    auto_vec<tree, 3> ops (3);
+	    ops.quick_push (TREE_OPERAND (*op0_p, 0));
+	    ops.quick_push (TREE_OPERAND (*op0_p, 1));
+	    ops.quick_push (*op1_p);
+
+	    enum internal_fn ifn = IFN_FMA;
+	    if (neg_mul)
+	      {
+		if (fma_supported_p (IFN_FNMA, type))
+		  ifn = IFN_FNMA;
+		else
+		  ops[0] = build1 (NEGATE_EXPR, type, ops[0]);
+	      }
+	    if (neg_add)
+	      {
+		enum internal_fn ifn2 = ifn == IFN_FMA ? IFN_FMS : IFN_FNMS;
+		if (fma_supported_p (ifn2, type))
+		  ifn = ifn2;
+		else
+		  ops[2] = build1 (NEGATE_EXPR, type, ops[2]);
+	      }
+	    for (auto &&op : ops)
+	      if (gimplify_expr (&op, pre_p, post_p, is_gimple_val, fb_rvalue)
+		  == GS_ERROR)
+		return GS_ERROR;
+
+	    gcall *call = gimple_build_call_internal_vec (ifn, ops);
+	    gimple_seq_add_stmt_without_update (pre_p, call);
+	    *expr_p = create_tmp_var (type);
+	    gimple_call_set_lhs (call, *expr_p);
+	    return GS_ALL_DONE;
+	  }
+	break;
+      }
+
     default:;
     }
 
diff --git a/gcc/common.opt b/gcc/common.opt
index a28ca13385..3daec85aef 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1662,9 +1662,8 @@  Name(fp_contract_mode) Type(enum fp_contract_mode) UnknownError(unknown floating
 EnumValue
 Enum(fp_contract_mode) String(off) Value(FP_CONTRACT_OFF)
 
-; Not implemented, fall back to conservative FP_CONTRACT_OFF.
 EnumValue
-Enum(fp_contract_mode) String(on) Value(FP_CONTRACT_OFF)
+Enum(fp_contract_mode) String(on) Value(FP_CONTRACT_ON)
 
 EnumValue
 Enum(fp_contract_mode) String(fast) Value(FP_CONTRACT_FAST)
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index 4622dba012..5cb1795482 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -9269,7 +9269,7 @@  (define_insn_and_split "*fmasf4"
 		 (match_operand:SF 3 "arith_reg_operand" "0")))
    (clobber (reg:SI FPSCR_STAT_REG))
    (use (reg:SI FPSCR_MODES_REG))]
-  "TARGET_SH2E && flag_fp_contract_mode != FP_CONTRACT_OFF"
+  "TARGET_SH2E && flag_fp_contract_mode == FP_CONTRACT_FAST"
   "fmac	%1,%2,%0"
   "&& can_create_pseudo_p ()"
   [(parallel [(set (match_dup 0)
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index b92b857602..cb1e9a1d9f 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -11983,10 +11983,12 @@  This option is enabled by default at optimization levels @option{-O1},
 such as forming of fused multiply-add operations if the target has
 native support for them.
 @option{-ffp-contract=on} enables floating-point expression contraction
-if allowed by the language standard.  This is currently not implemented
-and treated equal to @option{-ffp-contract=off}.
+if allowed by the language standard.  This is implemented for C and C++,
+where it enables contraction within one expression, but not across
+different statements.
 
-The default is @option{-ffp-contract=fast}.
+The default is @option{-ffp-contract=off} for C in a standards compliant mode
+(@option{-std=c11} or similar), @option{-ffp-contract=fast} otherwise.
 
 @opindex fomit-frame-pointer
 @item -fomit-frame-pointer
diff --git a/gcc/trans-mem.cc b/gcc/trans-mem.cc
index 4b129663e0..2174faef4c 100644
--- a/gcc/trans-mem.cc
+++ b/gcc/trans-mem.cc
@@ -637,6 +637,9 @@  diagnose_tm_1 (gimple_stmt_iterator *gsi, bool *handled_ops_p,
     {
     case GIMPLE_CALL:
       {
+	if (gimple_call_internal_p (stmt))
+	  break;
+
 	tree fn = gimple_call_fn (stmt);
 
 	if ((d->summary_flags & DIAG_TM_OUTER) == 0