libgcc, v2: Fix up _BitInt division [PR113604]
Checks
Commit Message
Hi!
On Sat, Jan 27, 2024 at 08:53:42AM +0100, Jakub Jelinek wrote:
> The following testcase ends up with SIGFPE in __divmodbitint4.
> The problem is a thinko in my attempt to implement Knuth's algorithm.
Here is an updated version of the patch, the libgcc part is the same,
but I've added a new testcase which tests the behavior of all the changed
cases, so it has a case where uv1:uv0 / vv1 is 1:1, where it is
1:0 and rhat + vv1 overflows and where it is 1:0 and rhat + vv1 does not
overflow, and includes tests also from Zdenek's other failing tests.
Previously successfully bootstrapped/regtested on x86_64-linux and
i686-linux, the new version has been retested with
make check-gcc GCC_TEST_RUN_EXPENSIVE=1 RUNTESTFLAGS="GCC_TEST_RUN_EXPENSIVE=1 dg-torture.exp='bitint-53.c bitint-55.c'"
Ok for trunk?
2024-01-31 Jakub Jelinek <jakub@redhat.com>
PR libgcc/113604
* libgcc2.c (__divmodbitint4): If uv1 >= vv1, subtract
vv1 from uv1:uv0 once or twice as needed, rather than
subtracting vv1:vv1.
* gcc.dg/torture/bitint-53.c: New test.
* gcc.dg/torture/bitint-55.c: New test.
Jakub
Comments
On Wed, 31 Jan 2024, Jakub Jelinek wrote:
> Hi!
>
> On Sat, Jan 27, 2024 at 08:53:42AM +0100, Jakub Jelinek wrote:
> > The following testcase ends up with SIGFPE in __divmodbitint4.
> > The problem is a thinko in my attempt to implement Knuth's algorithm.
>
> Here is an updated version of the patch, the libgcc part is the same,
> but I've added a new testcase which tests the behavior of all the changed
> cases, so it has a case where uv1:uv0 / vv1 is 1:1, where it is
> 1:0 and rhat + vv1 overflows and where it is 1:0 and rhat + vv1 does not
> overflow, and includes tests also from Zdenek's other failing tests.
>
> Previously successfully bootstrapped/regtested on x86_64-linux and
> i686-linux, the new version has been retested with
> make check-gcc GCC_TEST_RUN_EXPENSIVE=1 RUNTESTFLAGS="GCC_TEST_RUN_EXPENSIVE=1 dg-torture.exp='bitint-53.c bitint-55.c'"
> Ok for trunk?
OK.
@@ -1863,12 +1863,46 @@ __divmodbitint4 (UBILtype *q, SItype qpr
if (uv1 >= vv1)
{
/* udiv_qrnnd doesn't support quotients which don't
- fit into UWtype, so subtract from uv1:uv0 vv1
- first. */
- uv1 -= vv1 + __builtin_sub_overflow (uv0, vv1, &uv0);
- udiv_qrnnd (qhat, rhat, uv1, uv0, vv1);
- if (!__builtin_add_overflow (rhat, vv1, &rhat))
- goto again;
+ fit into UWtype, while Knuth's algorithm originally
+ uses a double-word by word to double-word division.
+ Fortunately, the algorithm guarantees that uv1 <= vv1,
+ because if uv1 > vv1, then even if v would have all
+ bits in all words below vv1 set, the previous iteration
+ would be supposed to use qhat larger by 1 and subtract
+ v. With uv1 == vv1 and uv0 >= vv1 the double-word
+ qhat in Knuth's algorithm would be 1 in the upper word
+ and 1 in the lower word, say for
+ uv1 0x8000000000000000ULL
+ uv0 0xffffffffffffffffULL
+ vv1 0x8000000000000000ULL
+ 0x8000000000000000ffffffffffffffffuwb
+ / 0x8000000000000000uwb == 0x10000000000000001uwb, and
+ exactly like that also for any other value
+ > 0x8000000000000000ULL in uv1 and vv1 and uv0 >= uv1.
+ So we need to subtract one or at most two vv1s from
+ uv1:uv0 (qhat because of that decreases by 1 or 2 and
+ is then representable in UWtype) and need to increase
+ rhat by vv1 once or twice because of that. Now, if
+ we need to subtract 2 vv1s, i.e. if
+ uv1 == vv1 && uv0 >= vv1, then rhat (which is uv0 - vv1)
+ + vv1 computation can't overflow, because it is equal
+ to uv0 and therefore the original algorithm in that case
+ performs goto again, but the second vv1 addition must
+ overflow already because vv1 has msb set from the
+ canonicalization. */
+ uv1 -= __builtin_sub_overflow (uv0, vv1, &uv0);
+ if (uv1 >= vv1)
+ {
+ uv1 -= __builtin_sub_overflow (uv0, vv1, &uv0);
+ udiv_qrnnd (qhat, rhat, uv1, uv0, vv1);
+ rhat += 2 * vv1;
+ }
+ else
+ {
+ udiv_qrnnd (qhat, rhat, uv1, uv0, vv1);
+ if (!__builtin_add_overflow (rhat, vv1, &rhat))
+ goto again;
+ }
}
else
{
@@ -0,0 +1,26 @@
+/* PR libgcc/113604 */
+/* { dg-do run { target bitint } } */
+/* { dg-options "-std=c23 -pedantic-errors" } */
+/* { dg-skip-if "" { ! run_expensive_tests } { "*" } { "-O0" "-O2" } } */
+/* { dg-skip-if "" { ! run_expensive_tests } { "-flto" } { "" } } */
+
+#if __BITINT_MAXWIDTH__ >= 256
+unsigned _BitInt (256) x;
+
+void
+foo (unsigned _BitInt (256) a, unsigned _BitInt (128) b)
+{
+ x = a / b;
+}
+#endif
+
+int
+main ()
+{
+#if __BITINT_MAXWIDTH__ >= 256
+ foo (0xfffffffffffffffffffffc0000000000000000000004uwb, 0x7ffffffffffffffffffffffffffuwb);
+ if (x != 0x1fffffffffffffffffuwb)
+ __builtin_abort ();
+#endif
+ return 0;
+}
@@ -0,0 +1,66 @@
+/* PR libgcc/113604 */
+/* { dg-do run { target bitint } } */
+/* { dg-options "-std=c23 -pedantic-errors" } */
+/* { dg-skip-if "" { ! run_expensive_tests } { "*" } { "-O0" "-O2" } } */
+/* { dg-skip-if "" { ! run_expensive_tests } { "-flto" } { "" } } */
+
+#if __BITINT_MAXWIDTH__ >= 513
+signed _BitInt(513)
+foo (signed _BitInt(513) x, signed _BitInt(513) y)
+{
+ return x % y;
+}
+#endif
+
+#if __BITINT_MAXWIDTH__ >= 512
+unsigned _BitInt(512)
+bar (unsigned _BitInt(512) x, unsigned _BitInt(512) y)
+{
+ return x % y;
+}
+#endif
+
+#if __BITINT_MAXWIDTH__ >= 256
+unsigned _BitInt(256)
+baz (unsigned _BitInt(256) x, unsigned _BitInt(256) y)
+{
+ return x % y;
+}
+#endif
+
+int
+main ()
+{
+#if __BITINT_MAXWIDTH__ >= 513
+ if (foo (11155754932722990178552651944728825929130437979239421228991532051555943675wb,
+ 32783817256434357484609367438786815wb) != 0wb)
+ __builtin_abort ();
+ if (foo (542904728531209767665756029992981529373473101602268731408384wb,
+ 235447394450476261134537147263765988105wb)
+ != 235447394450476261116090403190056436489wb)
+ __builtin_abort ();
+ if (foo (542904728531209767665690117878483036079552477922114364506112wb,
+ 235447394450476261134537147263765988105wb)
+ != 169535279951982967195466723035689534217wb)
+ __builtin_abort ();
+ if (foo (542904728531209767665690117878483036079534031178040654954496wb,
+ 235447394450476261134537147263765988105wb)
+ != 169535279951982967177019978961979982601wb)
+ __builtin_abort ();
+ if (foo (542904728531209767665454670484032559818581851459155356811264wb,
+ 235447394450476261134537147263765988105wb)
+ != 169535279951982967359377407340447827474wb)
+ __builtin_abort ();
+#endif
+#if __BITINT_MAXWIDTH__ >= 512
+ if (bar (6703903964971298549787012499102923063739682910296196688861780721860882015036773488400937149083451713845015929093243025426876941405973284973216824503042048uwb,
+ 170141183460469231731687303715884105735uwb) != 19208uwb)
+ __builtin_abort ();
+#endif
+#if __BITINT_MAXWIDTH__ >= 256
+ if (baz (115792089237316195423570985008687907853269984665640564039457584007913129639926uwb,
+ 68056473384187692692674921486353642292uwb) != 6uwb)
+ __builtin_abort ();
+#endif
+ return 0;
+}