c-family, c++, v2: Fix up handling of types which may have padding in __atomic_{compare_}exchange
Checks
Commit Message
On Tue, Feb 20, 2024 at 09:01:10AM +0100, Richard Biener wrote:
> I'm not sure those would be really equivalent (MEM_REF vs. V_C_E
> as well as combined vs. split). It really depends how RTL expansion
> handles this (as you can see padding can be fun here).
>
> So I'd be nervous for a match.pd rule here (also we can't match
> memory defs).
Ok. Perhaps forwprop then; anyway, that would be an optimization.
> As for your patch I'd go with a MEM_REF unconditionally, I don't
> think we want different behavior whether there's padding or not ...
I've made it conditional so that the MEM_REFs don't appear that often in the
FE trees, but maybe that is fine.
The unconditional patch would then be:
2024-02-20 Jakub Jelinek <jakub@redhat.com>
gcc/c-family/
* c-common.cc (resolve_overloaded_atomic_exchange): Instead of setting
p1 to VIEW_CONVERT_EXPR<I_type> (*p1), set it to MEM_REF with p1 and
(typeof (p1)) 0 operands and I_type type.
(resolve_overloaded_atomic_compare_exchange): Similarly for p2.
gcc/cp/
* pt.cc (tsubst_expr): Handle MEM_REF.
gcc/testsuite/
* g++.dg/ext/atomic-5.C: New test.
Jakub
Comments
On Tue, 20 Feb 2024, Jakub Jelinek wrote:
> On Tue, Feb 20, 2024 at 09:01:10AM +0100, Richard Biener wrote:
> > I'm not sure those would be really equivalent (MEM_REF vs. V_C_E
> > as well as combined vs. split). It really depends how RTL expansion
> > handles this (as you can see padding can be fun here).
> >
> > So I'd be nervous for a match.pd rule here (also we can't match
> > memory defs).
>
> Ok. Perhaps forwprop then; anyway, that would be an optimization.
>
> > As for your patch I'd go with a MEM_REF unconditionally, I don't
> > think we want different behavior whether there's padding or not ...
>
> I've made it conditional so that the MEM_REFs don't appear that often in the
> FE trees, but maybe that is fine.
>
> The unconditional patch would then be:
>
> 2024-02-20 Jakub Jelinek <jakub@redhat.com>
>
> gcc/c-family/
> * c-common.cc (resolve_overloaded_atomic_exchange): Instead of setting
> p1 to VIEW_CONVERT_EXPR<I_type> (*p1), set it to MEM_REF with p1 and
> (typeof (p1)) 0 operands and I_type type.
> (resolve_overloaded_atomic_compare_exchange): Similarly for p2.
> gcc/cp/
> * pt.cc (tsubst_expr): Handle MEM_REF.
> gcc/testsuite/
> * g++.dg/ext/atomic-5.C: New test.
>
> --- gcc/c-family/c-common.cc.jj 2024-02-17 16:40:42.831571693 +0100
> +++ gcc/c-family/c-common.cc 2024-02-20 10:58:56.599865656 +0100
> @@ -7793,9 +7793,14 @@ resolve_overloaded_atomic_exchange (loca
> /* Convert object pointer to required type. */
> p0 = build1 (VIEW_CONVERT_EXPR, I_type_ptr, p0);
> (*params)[0] = p0;
> - /* Convert new value to required type, and dereference it. */
> - p1 = build_indirect_ref (loc, p1, RO_UNARY_STAR);
> - p1 = build1 (VIEW_CONVERT_EXPR, I_type, p1);
> + /* Convert new value to required type, and dereference it.
> + If *p1 type can have padding or may involve floating point which
> + could e.g. be promoted to wider precision and demoted afterwards,
> + state of padding bits might not be preserved. */
> + build_indirect_ref (loc, p1, RO_UNARY_STAR);
> + p1 = build2_loc (loc, MEM_REF, I_type,
> + build1 (VIEW_CONVERT_EXPR, I_type_ptr, p1),
Why the V_C_E to I_type_ptr? The type of p1 doesn't
really matter (unless it could be a non-pointer).
Also note that I_type needs to be properly address-space qualified
in case the access should be to an address-space. Formerly with
the INDIRECT_REF that would likely be automagic.
> + build_zero_cst (TREE_TYPE (p1)));
> (*params)[1] = p1;
>
> /* Move memory model to the 3rd position, and end param list. */
> @@ -7873,9 +7878,14 @@ resolve_overloaded_atomic_compare_exchan
> p1 = build1 (VIEW_CONVERT_EXPR, I_type_ptr, p1);
> (*params)[1] = p1;
>
> - /* Convert desired value to required type, and dereference it. */
> - p2 = build_indirect_ref (loc, p2, RO_UNARY_STAR);
> - p2 = build1 (VIEW_CONVERT_EXPR, I_type, p2);
> + /* Convert desired value to required type, and dereference it.
> + If *p2 type can have padding or may involve floating point which
> + could e.g. be promoted to wider precision and demoted afterwards,
> + state of padding bits might not be preserved. */
> + build_indirect_ref (loc, p2, RO_UNARY_STAR);
> + p2 = build2_loc (loc, MEM_REF, I_type,
> + build1 (VIEW_CONVERT_EXPR, I_type_ptr, p2),
> + build_zero_cst (TREE_TYPE (p2)));
> (*params)[2] = p2;
>
> /* The rest of the parameters are fine. NULL means no special return value
> --- gcc/cp/pt.cc.jj 2024-02-17 16:40:42.868571182 +0100
> +++ gcc/cp/pt.cc 2024-02-20 10:57:36.646973603 +0100
> @@ -20088,6 +20088,14 @@ tsubst_expr (tree t, tree args, tsubst_f
> RETURN (r);
> }
>
> + case MEM_REF:
> + {
> + tree op0 = RECUR (TREE_OPERAND (t, 0));
> + tree op1 = RECUR (TREE_OPERAND (t, 0));
> + tree new_type = tsubst (TREE_TYPE (t), args, complain, in_decl);
> + RETURN (build2_loc (EXPR_LOCATION (t), MEM_REF, new_type, op0, op1));
> + }
> +
> case NOP_EXPR:
> {
> tree type = tsubst (TREE_TYPE (t), args, complain, in_decl);
> --- gcc/testsuite/g++.dg/ext/atomic-5.C.jj 2024-02-20 10:57:36.647973589 +0100
> +++ gcc/testsuite/g++.dg/ext/atomic-5.C 2024-02-20 10:57:36.647973589 +0100
> @@ -0,0 +1,42 @@
> +// { dg-do compile { target c++14 } }
> +
> +template <int N>
> +void
> +foo (long double *ptr, long double *val, long double *ret)
> +{
> + __atomic_exchange (ptr, val, ret, __ATOMIC_RELAXED);
> +}
> +
> +template <int N>
> +bool
> +bar (long double *ptr, long double *exp, long double *des)
> +{
> + return __atomic_compare_exchange (ptr, exp, des, false,
> + __ATOMIC_RELAXED, __ATOMIC_RELAXED);
> +}
> +
> +bool
> +baz (long double *p, long double *q, long double *r)
> +{
> + foo<0> (p, q, r);
> + foo<1> (p + 1, q + 1, r + 1);
> + return bar<0> (p + 2, q + 2, r + 2) || bar<1> (p + 3, q + 3, r + 3);
> +}
> +
> +constexpr int
> +qux (long double *ptr, long double *val, long double *ret)
> +{
> + __atomic_exchange (ptr, val, ret, __ATOMIC_RELAXED);
> + return 0;
> +}
> +
> +constexpr bool
> +corge (long double *ptr, long double *exp, long double *des)
> +{
> + return __atomic_compare_exchange (ptr, exp, des, false,
> + __ATOMIC_RELAXED, __ATOMIC_RELAXED);
> +}
> +
> +long double a[6];
> +const int b = qux (a, a + 1, a + 2);
> +const bool c = corge (a + 3, a + 4, a + 5);
>
>
> Jakub
>
>
On Tue, Feb 20, 2024 at 11:11:22AM +0100, Richard Biener wrote:
> > --- gcc/c-family/c-common.cc.jj 2024-02-17 16:40:42.831571693 +0100
> > +++ gcc/c-family/c-common.cc 2024-02-20 10:58:56.599865656 +0100
> > @@ -7793,9 +7793,14 @@ resolve_overloaded_atomic_exchange (loca
> > /* Convert object pointer to required type. */
> > p0 = build1 (VIEW_CONVERT_EXPR, I_type_ptr, p0);
> > (*params)[0] = p0;
> > - /* Convert new value to required type, and dereference it. */
> > - p1 = build_indirect_ref (loc, p1, RO_UNARY_STAR);
> > - p1 = build1 (VIEW_CONVERT_EXPR, I_type, p1);
> > + /* Convert new value to required type, and dereference it.
> > + If *p1 type can have padding or may involve floating point which
> > + could e.g. be promoted to wider precision and demoted afterwards,
> > + state of padding bits might not be preserved. */
> > + build_indirect_ref (loc, p1, RO_UNARY_STAR);
> > + p1 = build2_loc (loc, MEM_REF, I_type,
> > + build1 (VIEW_CONVERT_EXPR, I_type_ptr, p1),
>
> Why the V_C_E to I_type_ptr? The type of p1 doesn't
> really matter (unless it could be a non-pointer).
Just to help the FE when trying to constexpr evaluate it, because
for constexpr evaluation it evaluates MEM_REF the same as INDIRECT_REF
(and punts on non-0 second argument). The actual call is non-constexpr,
just wanted to avoid ICEs or something similar, constexpr evaluation can
try to process the arguments (and succeed or fail, doesn't matter,
but not ICE) and then the call will not be constant expression and so
everything won't be.
> Also note that I_type needs to be properly address-space qualified
> in case the access should be to an address-space. Formerly with
> the INDIRECT_REF that would likely be automagic.
I don't think using __atomic_*exchange on non-default as is valid,
if one doesn't have the exact __atomic_*exchange_N, it is handled
as a library call which is passed pointers and that definitely will
not deal with non-default address spaces.
Furthermore, the type should be the same in all arguments, and
the first argument is just converted to I_type_ptr and dealt with later, so
I don't think it ever worked even for the supported sizes.
int
foo (__seg_gs int *a, __seg_gs int *b, __seg_gs int *c)
{
return __atomic_compare_exchange (a, b, c, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
}
results in
movl (%rsi), %eax
movl %gs:(%rdx), %edx
lock cmpxchgl %edx, (%rdi)
sete %dl
je .L2
movl %eax, (%rsi)
.L2:
i.e. pretty much random what is loaded from a different AS and what is not.
Jakub
@@ -7793,9 +7793,14 @@ resolve_overloaded_atomic_exchange (loca
/* Convert object pointer to required type. */
p0 = build1 (VIEW_CONVERT_EXPR, I_type_ptr, p0);
(*params)[0] = p0;
- /* Convert new value to required type, and dereference it. */
- p1 = build_indirect_ref (loc, p1, RO_UNARY_STAR);
- p1 = build1 (VIEW_CONVERT_EXPR, I_type, p1);
+ /* Convert new value to required type, and dereference it.
+ If *p1 type can have padding or may involve floating point which
+ could e.g. be promoted to wider precision and demoted afterwards,
+ state of padding bits might not be preserved. */
+ build_indirect_ref (loc, p1, RO_UNARY_STAR);
+ p1 = build2_loc (loc, MEM_REF, I_type,
+ build1 (VIEW_CONVERT_EXPR, I_type_ptr, p1),
+ build_zero_cst (TREE_TYPE (p1)));
(*params)[1] = p1;
/* Move memory model to the 3rd position, and end param list. */
@@ -7873,9 +7878,14 @@ resolve_overloaded_atomic_compare_exchan
p1 = build1 (VIEW_CONVERT_EXPR, I_type_ptr, p1);
(*params)[1] = p1;
- /* Convert desired value to required type, and dereference it. */
- p2 = build_indirect_ref (loc, p2, RO_UNARY_STAR);
- p2 = build1 (VIEW_CONVERT_EXPR, I_type, p2);
+ /* Convert desired value to required type, and dereference it.
+ If *p2 type can have padding or may involve floating point which
+ could e.g. be promoted to wider precision and demoted afterwards,
+ state of padding bits might not be preserved. */
+ build_indirect_ref (loc, p2, RO_UNARY_STAR);
+ p2 = build2_loc (loc, MEM_REF, I_type,
+ build1 (VIEW_CONVERT_EXPR, I_type_ptr, p2),
+ build_zero_cst (TREE_TYPE (p2)));
(*params)[2] = p2;
/* The rest of the parameters are fine. NULL means no special return value
@@ -20088,6 +20088,14 @@ tsubst_expr (tree t, tree args, tsubst_f
RETURN (r);
}
+ case MEM_REF:
+ {
+ tree op0 = RECUR (TREE_OPERAND (t, 0));
+ tree op1 = RECUR (TREE_OPERAND (t, 0));
+ tree new_type = tsubst (TREE_TYPE (t), args, complain, in_decl);
+ RETURN (build2_loc (EXPR_LOCATION (t), MEM_REF, new_type, op0, op1));
+ }
+
case NOP_EXPR:
{
tree type = tsubst (TREE_TYPE (t), args, complain, in_decl);
@@ -0,0 +1,42 @@
+// { dg-do compile { target c++14 } }
+
+template <int N>
+void
+foo (long double *ptr, long double *val, long double *ret)
+{
+ __atomic_exchange (ptr, val, ret, __ATOMIC_RELAXED);
+}
+
+template <int N>
+bool
+bar (long double *ptr, long double *exp, long double *des)
+{
+ return __atomic_compare_exchange (ptr, exp, des, false,
+ __ATOMIC_RELAXED, __ATOMIC_RELAXED);
+}
+
+bool
+baz (long double *p, long double *q, long double *r)
+{
+ foo<0> (p, q, r);
+ foo<1> (p + 1, q + 1, r + 1);
+ return bar<0> (p + 2, q + 2, r + 2) || bar<1> (p + 3, q + 3, r + 3);
+}
+
+constexpr int
+qux (long double *ptr, long double *val, long double *ret)
+{
+ __atomic_exchange (ptr, val, ret, __ATOMIC_RELAXED);
+ return 0;
+}
+
+constexpr bool
+corge (long double *ptr, long double *exp, long double *des)
+{
+ return __atomic_compare_exchange (ptr, exp, des, false,
+ __ATOMIC_RELAXED, __ATOMIC_RELAXED);
+}
+
+long double a[6];
+const int b = qux (a, a + 1, a + 2);
+const bool c = corge (a + 3, a + 4, a + 5);