[committed] d: Fix gdc -O2 -mavx generates misaligned vmovdqa instruction [PR114171]
Checks
Commit Message
Hi,
This patch fixes a wrong code issue in the D front-end where lowered
struct comparisons would reinterpret fields with a different (usually
bigger) alignment than the original. Use `build_aligned_type' to
preserve the alignment when casting away for such comparisons.
Bootstrapped and regression tested on x86_64-linux-gnu/-m32, committed
to mainline, and backported to releases/gcc-13, releases/gcc-12, and
releases/gcc-11.
Regards,
Iain.
---
PR d/114171
gcc/d/ChangeLog:
* d-codegen.cc (lower_struct_comparison): Keep alignment of original
type in reinterpret cast for comparison.
gcc/testsuite/ChangeLog:
* gdc.dg/torture/pr114171.d: New test.
---
gcc/d/d-codegen.cc | 1 +
gcc/testsuite/gdc.dg/torture/pr114171.d | 29 +++++++++++++++++++++++++
2 files changed, 30 insertions(+)
create mode 100644 gcc/testsuite/gdc.dg/torture/pr114171.d
Comments
> Am 03.03.2024 um 02:51 schrieb Iain Buclaw <ibuclaw@gdcproject.org>:
>
> Hi,
>
> This patch fixes a wrong code issue in the D front-end where lowered
> struct comparisons would reinterpret fields with a different (usually
> bigger) alignment than the original. Use `build_aligned_type' to
> preserve the alignment when casting away for such comparisons.
>
> Bootstrapped and regression tested on x86_64-linux-gnu/-m32, committed
> to mainline, and backported to releases/gcc-13, releases/gcc-12, and
> releases/gcc-11.
LGTM. You might want to experiment with not doing the premature optimization but instead use __builtin_memcmp_eq (assuming that’s a general good fit). The middle-end should better turn that into target optimal code.
Richard
> Regards,
> Iain.
> ---
> PR d/114171
>
> gcc/d/ChangeLog:
>
> * d-codegen.cc (lower_struct_comparison): Keep alignment of original
> type in reinterpret cast for comparison.
>
> gcc/testsuite/ChangeLog:
>
> * gdc.dg/torture/pr114171.d: New test.
> ---
> gcc/d/d-codegen.cc | 1 +
> gcc/testsuite/gdc.dg/torture/pr114171.d | 29 +++++++++++++++++++++++++
> 2 files changed, 30 insertions(+)
> create mode 100644 gcc/testsuite/gdc.dg/torture/pr114171.d
>
> diff --git a/gcc/d/d-codegen.cc b/gcc/d/d-codegen.cc
> index 5bc233928aa..43d7739f8fc 100644
> --- a/gcc/d/d-codegen.cc
> +++ b/gcc/d/d-codegen.cc
> @@ -1006,6 +1006,7 @@ lower_struct_comparison (tree_code code, StructDeclaration *sd,
> if (tmode == NULL_TREE)
> tmode = make_unsigned_type (GET_MODE_BITSIZE (mode.require ()));
>
> + tmode = build_aligned_type (tmode, TYPE_ALIGN (stype));
> t1ref = build_vconvert (tmode, t1ref);
> t2ref = build_vconvert (tmode, t2ref);
>
> diff --git a/gcc/testsuite/gdc.dg/torture/pr114171.d b/gcc/testsuite/gdc.dg/torture/pr114171.d
> new file mode 100644
> index 00000000000..0f9ffcab916
> --- /dev/null
> +++ b/gcc/testsuite/gdc.dg/torture/pr114171.d
> @@ -0,0 +1,29 @@
> +// { dg-do run }
> +// { dg-additional-options "-mavx" { target avx_runtime } }
> +// { dg-skip-if "needs gcc/config.d" { ! d_runtime } }
> +import gcc.builtins;
> +
> +struct S1
> +{
> + string label;
> +}
> +
> +struct S2
> +{
> + ulong pad;
> + S1 label;
> +}
> +
> +pragma(inline, false)
> +auto newitem()
> +{
> + void *p = __builtin_malloc(S2.sizeof);
> + __builtin_memset(p, 0, S2.sizeof);
> + return cast(S2*) p;
> +}
> +
> +int main()
> +{
> + auto bn = newitem();
> + return bn.label is S1.init ? 0 : 1;
> +}
> --
> 2.40.1
>
Excerpts from Richard Biener's message of März 3, 2024 11:41 am:
>
>
>> Am 03.03.2024 um 02:51 schrieb Iain Buclaw <ibuclaw@gdcproject.org>:
>>
>> Hi,
>>
>> This patch fixes a wrong code issue in the D front-end where lowered
>> struct comparisons would reinterpret fields with a different (usually
>> bigger) alignment than the original. Use `build_aligned_type' to
>> preserve the alignment when casting away for such comparisons.
>>
>> Bootstrapped and regression tested on x86_64-linux-gnu/-m32, committed
>> to mainline, and backported to releases/gcc-13, releases/gcc-12, and
>> releases/gcc-11.
>
> LGTM. You might want to experiment with not doing the premature optimization but instead use __builtin_memcmp_eq (assuming that’s a general good fit). The middle-end should better turn that into target optimal code.
>
Indeed, just looking at the history, it was introduced well over ten
years ago so I can't comment on the original context for it (it doesn't
directly fix any old issues).
Small comparison between this optimization and memcmp_eq
---
_5 = newitem ();
# DEBUG bn => _5
_1 = MEM[(ucent *)_5 + 8B];
_2 = _1 != 0;
_6 = (int) _2;
return _6;
---
call _D8pr1141717newitemFNbNiZPSQz2S2
.LVL29:
vmovdqu 8(%rax), %xmm0
xorl %eax, %eax
.LVL30:
vptest %xmm0, %xmm0
setne %al
addq $8, %rsp
ret
---
_6 = newitem ();
# DEBUG bn => _6
D.2335.length = 0;
D.2335.ptr = 0B;
_1 = &_6->label.label;
_2 = __builtin_memcmp_eq (_1, &D.2335, 16);
_3 = _2 != 0;
_9 = (int) _3;
return _9;
---
call _D8pr1141717newitemFNbNiZPSQz2S2
.LVL29:
movq $0, (%rsp)
movq $0, 8(%rsp)
vmovdqu 8(%rax), %xmm0
xorl %eax, %eax
.LVL30:
vpxor (%rsp), %xmm0, %xmm0
vptest %xmm0, %xmm0
setne %al
addq $24, %rsp
ret
On Sat, Mar 2, 2024 at 5:51 PM Iain Buclaw <ibuclaw@gdcproject.org> wrote:
>
> Hi,
>
> This patch fixes a wrong code issue in the D front-end where lowered
> struct comparisons would reinterpret fields with a different (usually
> bigger) alignment than the original. Use `build_aligned_type' to
> preserve the alignment when casting away for such comparisons.
>
> Bootstrapped and regression tested on x86_64-linux-gnu/-m32, committed
> to mainline, and backported to releases/gcc-13, releases/gcc-12, and
> releases/gcc-11.
>
> Regards,
> Iain.
> ---
> PR d/114171
>
> gcc/d/ChangeLog:
>
> * d-codegen.cc (lower_struct_comparison): Keep alignment of original
> type in reinterpret cast for comparison.
>
> gcc/testsuite/ChangeLog:
>
> * gdc.dg/torture/pr114171.d: New test.
> ---
> gcc/d/d-codegen.cc | 1 +
> gcc/testsuite/gdc.dg/torture/pr114171.d | 29 +++++++++++++++++++++++++
> 2 files changed, 30 insertions(+)
> create mode 100644 gcc/testsuite/gdc.dg/torture/pr114171.d
>
> diff --git a/gcc/d/d-codegen.cc b/gcc/d/d-codegen.cc
> index 5bc233928aa..43d7739f8fc 100644
> --- a/gcc/d/d-codegen.cc
> +++ b/gcc/d/d-codegen.cc
> @@ -1006,6 +1006,7 @@ lower_struct_comparison (tree_code code, StructDeclaration *sd,
> if (tmode == NULL_TREE)
> tmode = make_unsigned_type (GET_MODE_BITSIZE (mode.require ()));
>
> + tmode = build_aligned_type (tmode, TYPE_ALIGN (stype));
You might also need to build a may_alias variant too. Or make sure the
access is using the correct aliasing set.
I have not checked if the D front-end does anything special for
aliasing sets so I am not sure if that is needed or not but I suspect
it is.
Thanks,
Andrew Pinski
> t1ref = build_vconvert (tmode, t1ref);
> t2ref = build_vconvert (tmode, t2ref);
>
> diff --git a/gcc/testsuite/gdc.dg/torture/pr114171.d b/gcc/testsuite/gdc.dg/torture/pr114171.d
> new file mode 100644
> index 00000000000..0f9ffcab916
> --- /dev/null
> +++ b/gcc/testsuite/gdc.dg/torture/pr114171.d
> @@ -0,0 +1,29 @@
> +// { dg-do run }
> +// { dg-additional-options "-mavx" { target avx_runtime } }
> +// { dg-skip-if "needs gcc/config.d" { ! d_runtime } }
> +import gcc.builtins;
> +
> +struct S1
> +{
> + string label;
> +}
> +
> +struct S2
> +{
> + ulong pad;
> + S1 label;
> +}
> +
> +pragma(inline, false)
> +auto newitem()
> +{
> + void *p = __builtin_malloc(S2.sizeof);
> + __builtin_memset(p, 0, S2.sizeof);
> + return cast(S2*) p;
> +}
> +
> +int main()
> +{
> + auto bn = newitem();
> + return bn.label is S1.init ? 0 : 1;
> +}
> --
> 2.40.1
>
Excerpts from Andrew Pinski's message of März 3, 2024 11:49 pm:
> On Sat, Mar 2, 2024 at 5:51 PM Iain Buclaw <ibuclaw@gdcproject.org> wrote:
>>
>> Hi,
>>
>> This patch fixes a wrong code issue in the D front-end where lowered
>> struct comparisons would reinterpret fields with a different (usually
>> bigger) alignment than the original. Use `build_aligned_type' to
>> preserve the alignment when casting away for such comparisons.
>>
>> Bootstrapped and regression tested on x86_64-linux-gnu/-m32, committed
>> to mainline, and backported to releases/gcc-13, releases/gcc-12, and
>> releases/gcc-11.
>>
>> Regards,
>> Iain.
>> ---
>> PR d/114171
>>
>> gcc/d/ChangeLog:
>>
>> * d-codegen.cc (lower_struct_comparison): Keep alignment of original
>> type in reinterpret cast for comparison.
>>
>> gcc/testsuite/ChangeLog:
>>
>> * gdc.dg/torture/pr114171.d: New test.
>> ---
>> gcc/d/d-codegen.cc | 1 +
>> gcc/testsuite/gdc.dg/torture/pr114171.d | 29 +++++++++++++++++++++++++
>> 2 files changed, 30 insertions(+)
>> create mode 100644 gcc/testsuite/gdc.dg/torture/pr114171.d
>>
>> diff --git a/gcc/d/d-codegen.cc b/gcc/d/d-codegen.cc
>> index 5bc233928aa..43d7739f8fc 100644
>> --- a/gcc/d/d-codegen.cc
>> +++ b/gcc/d/d-codegen.cc
>> @@ -1006,6 +1006,7 @@ lower_struct_comparison (tree_code code, StructDeclaration *sd,
>> if (tmode == NULL_TREE)
>> tmode = make_unsigned_type (GET_MODE_BITSIZE (mode.require ()));
>>
>> + tmode = build_aligned_type (tmode, TYPE_ALIGN (stype));
>
> You might also need to build a may_alias variant too. Or make sure the
> access is using the correct aliasing set.
> I have not checked if the D front-end does anything special for
> aliasing sets so I am not sure if that is needed or not but I suspect
> it is.
>
There are no alias sets defined in the D front-end - the reference
compiler doesn't enforce it, which has allowed enough code out there
to expect modifying bits (eg: of a float) through a reinterpret cast
(such as an int*) to just work.
Thanks for the reminder though.
Iain.
@@ -1006,6 +1006,7 @@ lower_struct_comparison (tree_code code, StructDeclaration *sd,
if (tmode == NULL_TREE)
tmode = make_unsigned_type (GET_MODE_BITSIZE (mode.require ()));
+ tmode = build_aligned_type (tmode, TYPE_ALIGN (stype));
t1ref = build_vconvert (tmode, t1ref);
t2ref = build_vconvert (tmode, t2ref);
new file mode 100644
@@ -0,0 +1,29 @@
+// { dg-do run }
+// { dg-additional-options "-mavx" { target avx_runtime } }
+// { dg-skip-if "needs gcc/config.d" { ! d_runtime } }
+import gcc.builtins;
+
+struct S1
+{
+ string label;
+}
+
+struct S2
+{
+ ulong pad;
+ S1 label;
+}
+
+pragma(inline, false)
+auto newitem()
+{
+ void *p = __builtin_malloc(S2.sizeof);
+ __builtin_memset(p, 0, S2.sizeof);
+ return cast(S2*) p;
+}
+
+int main()
+{
+ auto bn = newitem();
+ return bn.label is S1.init ? 0 : 1;
+}