lower-bitint: Avoid overlap between destinations and sources in libgcc calls [PR113421]
Checks
Commit Message
Hi!
The following testcase is miscompiled because the bitint lowering emits a
.MULBITINT (&a, 1024, &a, 1024, &x, 1024);
call. The bug is in the overlap between the destination and source, that is
something the libgcc routines don't handle, they use the source arrays
during the entire algorithms which computes the destination array(s).
For the mapping of SSA_NAMEs to VAR_DECLs the code already supports that
correctly, but the checking whether a load from memory can be used directly
without a temporary even when earlier we decided to merge the
multiplication/division/modulo etc. with a store didn't.
The following patch implements that.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2024-01-17 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/113421
* gimple-lower-bitint.cc (stmt_needs_operand_addr): Adjust function
comment.
(bitint_dom_walker::before_dom_children): Add g temporary to simplify
formatting. Start at vop rather than cvop even if stmt is a store
and needs_operand_addr.
* gcc.dg/torture/bitint-50.c: New test.
Jakub
Comments
On Wed, 17 Jan 2024, Jakub Jelinek wrote:
> Hi!
>
> The following testcase is miscompiled because the bitint lowering emits a
> .MULBITINT (&a, 1024, &a, 1024, &x, 1024);
> call. The bug is in the overlap between the destination and source, that is
> something the libgcc routines don't handle, they use the source arrays
> during the entire algorithms which computes the destination array(s).
> For the mapping of SSA_NAMEs to VAR_DECLs the code already supports that
> correctly, but the checking whether a load from memory can be used directly
> without a temporary even when earlier we decided to merge the
> multiplication/division/modulo etc. with a store didn't.
>
> The following patch implements that.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
OK.
> 2024-01-17 Jakub Jelinek <jakub@redhat.com>
>
> PR tree-optimization/113421
> * gimple-lower-bitint.cc (stmt_needs_operand_addr): Adjust function
> comment.
> (bitint_dom_walker::before_dom_children): Add g temporary to simplify
> formatting. Start at vop rather than cvop even if stmt is a store
> and needs_operand_addr.
>
> * gcc.dg/torture/bitint-50.c: New test.
>
> --- gcc/gimple-lower-bitint.cc.jj 2024-01-16 12:32:56.617721208 +0100
> +++ gcc/gimple-lower-bitint.cc 2024-01-16 17:33:04.046476302 +0100
> @@ -5455,7 +5455,8 @@ vuse_eq (ao_ref *, tree vuse1, void *dat
>
> /* Return true if STMT uses a library function and needs to take
> address of its inputs. We need to avoid bit-fields in those
> - cases. */
> + cases. Similarly, we need to avoid overlap between destination
> + and source limb arrays. */
>
> bool
> stmt_needs_operand_addr (gimple *stmt)
> @@ -5574,7 +5575,8 @@ bitint_dom_walker::before_dom_children (
> else if (!bitmap_bit_p (m_loads, SSA_NAME_VERSION (s)))
> continue;
>
> - tree rhs1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (s));
> + gimple *g = SSA_NAME_DEF_STMT (s);
> + tree rhs1 = gimple_assign_rhs1 (g);
> if (needs_operand_addr
> && TREE_CODE (rhs1) == COMPONENT_REF
> && DECL_BIT_FIELD_TYPE (TREE_OPERAND (rhs1, 1)))
> @@ -5596,15 +5598,14 @@ bitint_dom_walker::before_dom_children (
>
> ao_ref ref;
> ao_ref_init (&ref, rhs1);
> - tree lvop = gimple_vuse (SSA_NAME_DEF_STMT (s));
> + tree lvop = gimple_vuse (g);
> unsigned limit = 64;
> tree vuse = cvop;
> if (vop != cvop
> && is_gimple_assign (stmt)
> && gimple_store_p (stmt)
> - && !operand_equal_p (lhs,
> - gimple_assign_rhs1 (SSA_NAME_DEF_STMT (s)),
> - 0))
> + && (needs_operand_addr
> + || !operand_equal_p (lhs, gimple_assign_rhs1 (g), 0)))
> vuse = vop;
> if (vuse != lvop
> && walk_non_aliased_vuses (&ref, vuse, false, vuse_eq,
> --- gcc/testsuite/gcc.dg/torture/bitint-50.c.jj 2024-01-16 17:35:16.084622119 +0100
> +++ gcc/testsuite/gcc.dg/torture/bitint-50.c 2024-01-16 17:35:06.701753879 +0100
> @@ -0,0 +1,31 @@
> +/* PR tree-optimization/113421 */
> +/* { dg-do run { target bitint } } */
> +/* { dg-options "-std=c23 -pedantic-errors" } */
> +/* { dg-skip-if "" { ! run_expensive_tests } { "*" } { "-O0" "-O2" } } */
> +/* { dg-skip-if "" { ! run_expensive_tests } { "-flto" } { "" } } */
> +
> +#if __BITINT_MAXWIDTH__ >= 1024
> +unsigned _BitInt(1024) a = -5wb;
> +
> +__attribute__((noipa)) void
> +foo (unsigned _BitInt(1024) x)
> +{
> + a *= x;
> +}
> +#else
> +int a = 30;
> +
> +void
> +foo (int)
> +{
> +}
> +#endif
> +
> +int
> +main ()
> +{
> + foo (-6wb);
> + if (a != 30wb)
> + __builtin_abort ();
> + return 0;
> +}
>
> Jakub
>
>
@@ -5455,7 +5455,8 @@ vuse_eq (ao_ref *, tree vuse1, void *dat
/* Return true if STMT uses a library function and needs to take
address of its inputs. We need to avoid bit-fields in those
- cases. */
+ cases. Similarly, we need to avoid overlap between destination
+ and source limb arrays. */
bool
stmt_needs_operand_addr (gimple *stmt)
@@ -5574,7 +5575,8 @@ bitint_dom_walker::before_dom_children (
else if (!bitmap_bit_p (m_loads, SSA_NAME_VERSION (s)))
continue;
- tree rhs1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (s));
+ gimple *g = SSA_NAME_DEF_STMT (s);
+ tree rhs1 = gimple_assign_rhs1 (g);
if (needs_operand_addr
&& TREE_CODE (rhs1) == COMPONENT_REF
&& DECL_BIT_FIELD_TYPE (TREE_OPERAND (rhs1, 1)))
@@ -5596,15 +5598,14 @@ bitint_dom_walker::before_dom_children (
ao_ref ref;
ao_ref_init (&ref, rhs1);
- tree lvop = gimple_vuse (SSA_NAME_DEF_STMT (s));
+ tree lvop = gimple_vuse (g);
unsigned limit = 64;
tree vuse = cvop;
if (vop != cvop
&& is_gimple_assign (stmt)
&& gimple_store_p (stmt)
- && !operand_equal_p (lhs,
- gimple_assign_rhs1 (SSA_NAME_DEF_STMT (s)),
- 0))
+ && (needs_operand_addr
+ || !operand_equal_p (lhs, gimple_assign_rhs1 (g), 0)))
vuse = vop;
if (vuse != lvop
&& walk_non_aliased_vuses (&ref, vuse, false, vuse_eq,
@@ -0,0 +1,31 @@
+/* PR tree-optimization/113421 */
+/* { dg-do run { target bitint } } */
+/* { dg-options "-std=c23 -pedantic-errors" } */
+/* { dg-skip-if "" { ! run_expensive_tests } { "*" } { "-O0" "-O2" } } */
+/* { dg-skip-if "" { ! run_expensive_tests } { "-flto" } { "" } } */
+
+#if __BITINT_MAXWIDTH__ >= 1024
+unsigned _BitInt(1024) a = -5wb;
+
+__attribute__((noipa)) void
+foo (unsigned _BitInt(1024) x)
+{
+ a *= x;
+}
+#else
+int a = 30;
+
+void
+foo (int)
+{
+}
+#endif
+
+int
+main ()
+{
+ foo (-6wb);
+ if (a != 30wb)
+ __builtin_abort ();
+ return 0;
+}