i386: Fix up split_double_concat [PR109109]
Checks
Commit Message
Hi!
In my PR107627 change I've missed one important case, which causes
miscompilation of f4 and f6 in the following tests.
Combine matches there *concatsidi3_3 define_insn_and_split (as with all
other f* functions in those tests), and RA ends up with:
(insn 11 10 17 2 (set (reg:DI 0 ax [89])
(ior:DI (ashift:DI (zero_extend:DI (mem:SI (plus:SI (mult:SI (reg:SI 0 ax [94])
(const_int 4 [0x4]))
(symbol_ref:SI ("arr") [flags 0x2] <var_decl 0x7f4e7fe4ccf0 arr>)) [1 arr[ax_6(D)]+0 S4 A32]))
(const_int 32 [0x20]))
(zero_extend:DI (reg:SI 1 dx [95])))) "pr109109-6.c":24:49 681 {*concatsidi3_3}
(nil))
split_double_concat turned that into:
movl arr(,%eax,4), %edx
movl %edx, %eax
which is incorrect, because the first instruction overrides the input
%edx value that should be put into output %eax; the two insns can't be
swapped because the MEM's address uses %eax.
The following patch fixes that case to emit
movl arr(,%eax,4), %eax
xchgl %edx, %eax
instead.
Bootstrap/regtest on x86_64-linux and i686-linux pending, ok for trunk
if it passes on both?
2023-03-14 Jakub Jelinek <jakub@redhat.com>
PR target/109109
* config/i386/i386-expand.cc (split_double_concat): Fix splitting
when lo is equal to dhi and hi is a MEM which uses dlo register.
* gcc.target/i386/pr109109-1.c: New test.
* gcc.target/i386/pr109109-2.c: New test.
Jakub
Comments
On Tue, Mar 14, 2023 at 5:09 PM Jakub Jelinek <jakub@redhat.com> wrote:
>
> Hi!
>
> In my PR107627 change I've missed one important case, which causes
> miscompilation of f4 and f6 in the following tests.
>
> Combine matches there *concatsidi3_3 define_insn_and_split (as with all
> other f* functions in those tests), and RA ends up with:
> (insn 11 10 17 2 (set (reg:DI 0 ax [89])
> (ior:DI (ashift:DI (zero_extend:DI (mem:SI (plus:SI (mult:SI (reg:SI 0 ax [94])
> (const_int 4 [0x4]))
> (symbol_ref:SI ("arr") [flags 0x2] <var_decl 0x7f4e7fe4ccf0 arr>)) [1 arr[ax_6(D)]+0 S4 A32]))
> (const_int 32 [0x20]))
> (zero_extend:DI (reg:SI 1 dx [95])))) "pr109109-6.c":24:49 681 {*concatsidi3_3}
> (nil))
> split_double_concat turned that into:
> movl arr(,%eax,4), %edx
> movl %edx, %eax
> which is incorrect, because the first instruction overrides the input
> %edx value that should be put into output %eax; the two insns can't be
> swapped because the MEM's address uses %eax.
>
> The following patch fixes that case to emit
> movl arr(,%eax,4), %eax
> xchgl %edx, %eax
> instead.
>
> Bootstrap/regtest on x86_64-linux and i686-linux pending, ok for trunk
> if it passes on both?
>
> 2023-03-14 Jakub Jelinek <jakub@redhat.com>
>
> PR target/109109
> * config/i386/i386-expand.cc (split_double_concat): Fix splitting
> when lo is equal to dhi and hi is a MEM which uses dlo register.
>
> * gcc.target/i386/pr109109-1.c: New test.
> * gcc.target/i386/pr109109-2.c: New test.
OK.
Thanks,
Uros.
>
> --- gcc/config/i386/i386-expand.cc.jj 2023-02-18 12:39:58.334768946 +0100
> +++ gcc/config/i386/i386-expand.cc 2023-03-14 15:07:38.672919652 +0100
> @@ -197,9 +197,20 @@ split_double_concat (machine_mode mode,
> {
> /* In this case, code below would first emit_move_insn (dlo, lo)
> and then emit_move_insn (dhi, hi). But the former would
> - invalidate hi's address. Load into dhi first. */
> - emit_move_insn (dhi, hi);
> - hi = dhi;
> + invalidate hi's address. */
> + if (rtx_equal_p (dhi, lo))
> + {
> + /* We can't load into dhi first, so load into dlo
> + first and we'll swap. */
> + emit_move_insn (dlo, hi);
> + hi = dlo;
> + }
> + else
> + {
> + /* Load into dhi first. */
> + emit_move_insn (dhi, hi);
> + hi = dhi;
> + }
> }
> if (!rtx_equal_p (dlo, hi))
> {
> --- gcc/testsuite/gcc.target/i386/pr109109-1.c.jj 2023-03-14 15:51:35.104926863 +0100
> +++ gcc/testsuite/gcc.target/i386/pr109109-1.c 2023-03-14 15:51:16.715191961 +0100
> @@ -0,0 +1,139 @@
> +/* PR target/109109 */
> +/* { dg-do run { target ia32 } } */
> +/* { dg-options "-O2" } */
> +
> +unsigned int arr[64];
> +
> +__attribute__((noipa, regparm (2))) unsigned long long
> +f1 (unsigned int ax, unsigned int dx)
> +{
> + return (((unsigned long long) arr[ax]) << 32) | ax;
> +}
> +
> +__attribute__((noipa, regparm (2))) unsigned long long
> +f2 (unsigned int ax, unsigned int dx)
> +{
> + return (((unsigned long long) arr[dx]) << 32) | ax;
> +}
> +
> +__attribute__((noipa, regparm (2))) unsigned long long
> +f3 (unsigned int ax, unsigned int dx)
> +{
> + return (((unsigned long long) ((unsigned int *) (((char *) arr) + ax))[dx]) << 32) | ax;
> +}
> +
> +__attribute__((noipa, regparm (2))) unsigned long long
> +f4 (unsigned int ax, unsigned int dx)
> +{
> + return (((unsigned long long) arr[ax]) << 32) | dx;
> +}
> +
> +__attribute__((noipa, regparm (2))) unsigned long long
> +f5 (unsigned int ax, unsigned int dx)
> +{
> + return (((unsigned long long) arr[dx]) << 32) | dx;
> +}
> +
> +__attribute__((noipa, regparm (2))) unsigned long long
> +f6 (unsigned int ax, unsigned int dx)
> +{
> + return (((unsigned long long) ((unsigned int *) (((char *) arr) + ax))[dx]) << 32) | dx;
> +}
> +
> +__attribute__((noipa, regparm (3))) unsigned long long
> +f7 (unsigned int ax, unsigned int dx, unsigned int cx)
> +{
> + return (((unsigned long long) arr[ax]) << 32) | cx;
> +}
> +
> +__attribute__((noipa, regparm (3))) unsigned long long
> +f8 (unsigned int ax, unsigned int dx, unsigned int cx)
> +{
> + return (((unsigned long long) arr[dx]) << 32) | cx;
> +}
> +
> +__attribute__((noipa, regparm (3))) unsigned long long
> +f9 (unsigned int ax, unsigned int dx, unsigned int cx)
> +{
> + return (((unsigned long long) ((unsigned int *) (((char *) arr) + ax))[dx]) << 32) | cx;
> +}
> +
> +__attribute__((noipa, regparm (2))) unsigned long long
> +f10 (unsigned int ax, unsigned int dx)
> +{
> + return (((unsigned long long) ax) << 32) | arr[ax];
> +}
> +
> +__attribute__((noipa, regparm (2))) unsigned long long
> +f11 (unsigned int ax, unsigned int dx)
> +{
> + return (((unsigned long long) ax) << 32) | arr[dx];
> +}
> +
> +__attribute__((noipa, regparm (2))) unsigned long long
> +f12 (unsigned int ax, unsigned int dx)
> +{
> + return (((unsigned long long) ax) << 32) | ((unsigned int *) (((char *) arr) + ax))[dx];
> +}
> +
> +__attribute__((noipa, regparm (2))) unsigned long long
> +f13 (unsigned int ax, unsigned int dx)
> +{
> + return (((unsigned long long) dx) << 32) | arr[ax];
> +}
> +
> +__attribute__((noipa, regparm (2))) unsigned long long
> +f14 (unsigned int ax, unsigned int dx)
> +{
> + return (((unsigned long long) dx) << 32) | arr[dx];
> +}
> +
> +__attribute__((noipa, regparm (2))) unsigned long long
> +f15 (unsigned int ax, unsigned int dx)
> +{
> + return (((unsigned long long) dx) << 32) | ((unsigned int *) (((char *) arr) + ax))[dx];
> +}
> +
> +__attribute__((noipa, regparm (3))) unsigned long long
> +f16 (unsigned int ax, unsigned int dx, unsigned int cx)
> +{
> + return (((unsigned long long) cx) << 32) | arr[ax];
> +}
> +
> +__attribute__((noipa, regparm (3))) unsigned long long
> +f17 (unsigned int ax, unsigned int dx, unsigned int cx)
> +{
> + return (((unsigned long long) cx) << 32) | arr[dx];
> +}
> +
> +__attribute__((noipa, regparm (3))) unsigned long long
> +f18 (unsigned int ax, unsigned int dx, unsigned int cx)
> +{
> + return (((unsigned long long) cx) << 32) | ((unsigned int *) (((char *) arr) + ax))[dx];
> +}
> +
> +int
> +main ()
> +{
> + for (int i = 0; i < 64; i++)
> + arr[i] = 64 + i;
> +#define CHECK_EQ(x, y) do { if (x != y) __builtin_abort (); } while (0)
> + CHECK_EQ (f1 (8, 9), 0x4800000008ULL);
> + CHECK_EQ (f2 (8, 9), 0x4900000008ULL);
> + CHECK_EQ (f3 (8, 9), 0x4b00000008ULL);
> + CHECK_EQ (f4 (8, 9), 0x4800000009ULL);
> + CHECK_EQ (f5 (8, 9), 0x4900000009ULL);
> + CHECK_EQ (f6 (8, 9), 0x4b00000009ULL);
> + CHECK_EQ (f7 (8, 9, 10), 0x480000000aULL);
> + CHECK_EQ (f8 (8, 9, 10), 0x490000000aULL);
> + CHECK_EQ (f9 (8, 9, 10), 0x4b0000000aULL);
> + CHECK_EQ (f10 (8, 9), 0x800000048ULL);
> + CHECK_EQ (f11 (8, 9), 0x800000049ULL);
> + CHECK_EQ (f12 (8, 9), 0x80000004bULL);
> + CHECK_EQ (f13 (8, 9), 0x900000048ULL);
> + CHECK_EQ (f14 (8, 9), 0x900000049ULL);
> + CHECK_EQ (f15 (8, 9), 0x90000004bULL);
> + CHECK_EQ (f16 (8, 9, 10), 0xa00000048ULL);
> + CHECK_EQ (f17 (8, 9, 10), 0xa00000049ULL);
> + CHECK_EQ (f18 (8, 9, 10), 0xa0000004bULL);
> +}
> --- gcc/testsuite/gcc.target/i386/pr109109-2.c.jj 2023-03-14 15:53:08.619578782 +0100
> +++ gcc/testsuite/gcc.target/i386/pr109109-2.c 2023-03-14 16:05:22.675995934 +0100
> @@ -0,0 +1,175 @@
> +/* PR target/109109 */
> +/* { dg-do run { target lp64 } } */
> +/* { dg-options "-O2" } */
> +
> +unsigned long arr[64];
> +
> +__attribute__((noipa)) unsigned __int128
> +f1 (unsigned long di, unsigned long si, unsigned long dx)
> +{
> + unsigned long ax;
> + asm ("" : "=a" (ax) : "0" (di));
> + return (((unsigned __int128) arr[ax]) << 64) | ax;
> +}
> +
> +__attribute__((noipa)) unsigned __int128
> +f2 (unsigned long di, unsigned long si, unsigned long dx)
> +{
> + unsigned long ax;
> + asm ("" : "=a" (ax) : "0" (di));
> + return (((unsigned __int128) arr[dx]) << 64) | ax;
> +}
> +
> +__attribute__((noipa)) unsigned __int128
> +f3 (unsigned long di, unsigned long si, unsigned long dx)
> +{
> + unsigned long ax;
> + asm ("" : "=a" (ax) : "0" (di));
> + return (((unsigned __int128) ((unsigned long *) (((char *) arr) + ax))[dx]) << 64) | ax;
> +}
> +
> +__attribute__((noipa)) unsigned __int128
> +f4 (unsigned long di, unsigned long si, unsigned long dx)
> +{
> + unsigned long ax;
> + asm ("" : "=a" (ax) : "0" (di));
> + return (((unsigned __int128) arr[ax]) << 64) | dx;
> +}
> +
> +__attribute__((noipa)) unsigned __int128
> +f5 (unsigned long di, unsigned long si, unsigned long dx)
> +{
> + unsigned long ax;
> + asm ("" : "=a" (ax) : "0" (di));
> + return (((unsigned __int128) arr[dx]) << 64) | dx;
> +}
> +
> +__attribute__((noipa)) unsigned __int128
> +f6 (unsigned long di, unsigned long si, unsigned long dx)
> +{
> + unsigned long ax;
> + asm ("" : "=a" (ax) : "0" (di));
> + return (((unsigned __int128) ((unsigned long *) (((char *) arr) + ax))[dx]) << 64) | dx;
> +}
> +
> +__attribute__((noipa)) unsigned __int128
> +f7 (unsigned long di, unsigned long si, unsigned long dx, unsigned long cx)
> +{
> + unsigned long ax;
> + asm ("" : "=a" (ax) : "0" (di));
> + return (((unsigned __int128) arr[ax]) << 64) | cx;
> +}
> +
> +__attribute__((noipa)) unsigned __int128
> +f8 (unsigned long di, unsigned long si, unsigned long dx, unsigned long cx)
> +{
> + unsigned long ax;
> + asm ("" : "=a" (ax) : "0" (di));
> + return (((unsigned __int128) arr[dx]) << 64) | cx;
> +}
> +
> +__attribute__((noipa)) unsigned __int128
> +f9 (unsigned long di, unsigned long si, unsigned long dx, unsigned long cx)
> +{
> + unsigned long ax;
> + asm ("" : "=a" (ax) : "0" (di));
> + return (((unsigned __int128) ((unsigned long *) (((char *) arr) + ax))[dx]) << 64) | cx;
> +}
> +
> +__attribute__((noipa)) unsigned __int128
> +f10 (unsigned long di, unsigned long si, unsigned long dx)
> +{
> + unsigned long ax;
> + asm ("" : "=a" (ax) : "0" (di));
> + return (((unsigned __int128) ax) << 64) | arr[ax];
> +}
> +
> +__attribute__((noipa)) unsigned __int128
> +f11 (unsigned long di, unsigned long si, unsigned long dx)
> +{
> + unsigned long ax;
> + asm ("" : "=a" (ax) : "0" (di));
> + return (((unsigned __int128) ax) << 64) | arr[dx];
> +}
> +
> +__attribute__((noipa)) unsigned __int128
> +f12 (unsigned long di, unsigned long si, unsigned long dx)
> +{
> + unsigned long ax;
> + asm ("" : "=a" (ax) : "0" (di));
> + return (((unsigned __int128) ax) << 64) | ((unsigned long *) (((char *) arr) + ax))[dx];
> +}
> +
> +__attribute__((noipa)) unsigned __int128
> +f13 (unsigned long di, unsigned long si, unsigned long dx)
> +{
> + unsigned long ax;
> + asm ("" : "=a" (ax) : "0" (di));
> + return (((unsigned __int128) dx) << 64) | arr[ax];
> +}
> +
> +__attribute__((noipa)) unsigned __int128
> +f14 (unsigned long di, unsigned long si, unsigned long dx)
> +{
> + unsigned long ax;
> + asm ("" : "=a" (ax) : "0" (di));
> + return (((unsigned __int128) dx) << 64) | arr[dx];
> +}
> +
> +__attribute__((noipa)) unsigned __int128
> +f15 (unsigned long di, unsigned long si, unsigned long dx)
> +{
> + unsigned long ax;
> + asm ("" : "=a" (ax) : "0" (di));
> + return (((unsigned __int128) dx) << 64) | ((unsigned long *) (((char *) arr) + ax))[dx];
> +}
> +
> +__attribute__((noipa)) unsigned __int128
> +f16 (unsigned long di, unsigned long si, unsigned long dx, unsigned long cx)
> +{
> + unsigned long ax;
> + asm ("" : "=a" (ax) : "0" (di));
> + return (((unsigned __int128) cx) << 64) | arr[ax];
> +}
> +
> +__attribute__((noipa)) unsigned __int128
> +f17 (unsigned long di, unsigned long si, unsigned long dx, unsigned long cx)
> +{
> + unsigned long ax;
> + asm ("" : "=a" (ax) : "0" (di));
> + return (((unsigned __int128) cx) << 64) | arr[dx];
> +}
> +
> +__attribute__((noipa)) unsigned __int128
> +f18 (unsigned long di, unsigned long si, unsigned long dx, unsigned long cx)
> +{
> + unsigned long ax;
> + asm ("" : "=a" (ax) : "0" (di));
> + return (((unsigned __int128) cx) << 64) | ((unsigned long *) (((char *) arr) + ax))[dx];
> +}
> +
> +int
> +main ()
> +{
> + for (int i = 0; i < 64; i++)
> + arr[i] = 64 + i;
> +#define CHECK_EQ(x, y1, y2) do { unsigned __int128 y = y1; y <<= 64; y += y2; if (x != y) __builtin_abort (); } while (0)
> + CHECK_EQ (f1 (8, 0, 9), 0x48, 0x8);
> + CHECK_EQ (f2 (8, 0, 9), 0x49, 0x8);
> + CHECK_EQ (f3 (8, 0, 9), 0x4a, 0x8);
> + CHECK_EQ (f4 (8, 0, 9), 0x48, 0x9);
> + CHECK_EQ (f5 (8, 0, 9), 0x49, 0x9);
> + CHECK_EQ (f6 (8, 0, 9), 0x4a, 0x9);
> + CHECK_EQ (f7 (8, 0, 9, 10), 0x48, 0xa);
> + CHECK_EQ (f8 (8, 0, 9, 10), 0x49, 0xa);
> + CHECK_EQ (f9 (8, 0, 9, 10), 0x4a, 0xa);
> + CHECK_EQ (f10 (8, 0, 9), 0x8, 0x48);
> + CHECK_EQ (f11 (8, 0, 9), 0x8, 0x49);
> + CHECK_EQ (f12 (8, 0, 9), 0x8, 0x4a);
> + CHECK_EQ (f13 (8, 0, 9), 0x9, 0x48);
> + CHECK_EQ (f14 (8, 0, 9), 0x9, 0x49);
> + CHECK_EQ (f15 (8, 0, 9), 0x9, 0x4a);
> + CHECK_EQ (f16 (8, 0, 9, 10), 0xa, 0x48);
> + CHECK_EQ (f17 (8, 0, 9, 10), 0xa, 0x49);
> + CHECK_EQ (f18 (8, 0, 9, 10), 0xa, 0x4a);
> +}
>
> Jakub
>
@@ -197,9 +197,20 @@ split_double_concat (machine_mode mode,
{
/* In this case, code below would first emit_move_insn (dlo, lo)
and then emit_move_insn (dhi, hi). But the former would
- invalidate hi's address. Load into dhi first. */
- emit_move_insn (dhi, hi);
- hi = dhi;
+ invalidate hi's address. */
+ if (rtx_equal_p (dhi, lo))
+ {
+ /* We can't load into dhi first, so load into dlo
+ first and we'll swap. */
+ emit_move_insn (dlo, hi);
+ hi = dlo;
+ }
+ else
+ {
+ /* Load into dhi first. */
+ emit_move_insn (dhi, hi);
+ hi = dhi;
+ }
}
if (!rtx_equal_p (dlo, hi))
{
@@ -0,0 +1,139 @@
+/* PR target/109109 */
+/* { dg-do run { target ia32 } } */
+/* { dg-options "-O2" } */
+
+unsigned int arr[64];
+
+__attribute__((noipa, regparm (2))) unsigned long long
+f1 (unsigned int ax, unsigned int dx)
+{
+ return (((unsigned long long) arr[ax]) << 32) | ax;
+}
+
+__attribute__((noipa, regparm (2))) unsigned long long
+f2 (unsigned int ax, unsigned int dx)
+{
+ return (((unsigned long long) arr[dx]) << 32) | ax;
+}
+
+__attribute__((noipa, regparm (2))) unsigned long long
+f3 (unsigned int ax, unsigned int dx)
+{
+ return (((unsigned long long) ((unsigned int *) (((char *) arr) + ax))[dx]) << 32) | ax;
+}
+
+__attribute__((noipa, regparm (2))) unsigned long long
+f4 (unsigned int ax, unsigned int dx)
+{
+ return (((unsigned long long) arr[ax]) << 32) | dx;
+}
+
+__attribute__((noipa, regparm (2))) unsigned long long
+f5 (unsigned int ax, unsigned int dx)
+{
+ return (((unsigned long long) arr[dx]) << 32) | dx;
+}
+
+__attribute__((noipa, regparm (2))) unsigned long long
+f6 (unsigned int ax, unsigned int dx)
+{
+ return (((unsigned long long) ((unsigned int *) (((char *) arr) + ax))[dx]) << 32) | dx;
+}
+
+__attribute__((noipa, regparm (3))) unsigned long long
+f7 (unsigned int ax, unsigned int dx, unsigned int cx)
+{
+ return (((unsigned long long) arr[ax]) << 32) | cx;
+}
+
+__attribute__((noipa, regparm (3))) unsigned long long
+f8 (unsigned int ax, unsigned int dx, unsigned int cx)
+{
+ return (((unsigned long long) arr[dx]) << 32) | cx;
+}
+
+__attribute__((noipa, regparm (3))) unsigned long long
+f9 (unsigned int ax, unsigned int dx, unsigned int cx)
+{
+ return (((unsigned long long) ((unsigned int *) (((char *) arr) + ax))[dx]) << 32) | cx;
+}
+
+__attribute__((noipa, regparm (2))) unsigned long long
+f10 (unsigned int ax, unsigned int dx)
+{
+ return (((unsigned long long) ax) << 32) | arr[ax];
+}
+
+__attribute__((noipa, regparm (2))) unsigned long long
+f11 (unsigned int ax, unsigned int dx)
+{
+ return (((unsigned long long) ax) << 32) | arr[dx];
+}
+
+__attribute__((noipa, regparm (2))) unsigned long long
+f12 (unsigned int ax, unsigned int dx)
+{
+ return (((unsigned long long) ax) << 32) | ((unsigned int *) (((char *) arr) + ax))[dx];
+}
+
+__attribute__((noipa, regparm (2))) unsigned long long
+f13 (unsigned int ax, unsigned int dx)
+{
+ return (((unsigned long long) dx) << 32) | arr[ax];
+}
+
+__attribute__((noipa, regparm (2))) unsigned long long
+f14 (unsigned int ax, unsigned int dx)
+{
+ return (((unsigned long long) dx) << 32) | arr[dx];
+}
+
+__attribute__((noipa, regparm (2))) unsigned long long
+f15 (unsigned int ax, unsigned int dx)
+{
+ return (((unsigned long long) dx) << 32) | ((unsigned int *) (((char *) arr) + ax))[dx];
+}
+
+__attribute__((noipa, regparm (3))) unsigned long long
+f16 (unsigned int ax, unsigned int dx, unsigned int cx)
+{
+ return (((unsigned long long) cx) << 32) | arr[ax];
+}
+
+__attribute__((noipa, regparm (3))) unsigned long long
+f17 (unsigned int ax, unsigned int dx, unsigned int cx)
+{
+ return (((unsigned long long) cx) << 32) | arr[dx];
+}
+
+__attribute__((noipa, regparm (3))) unsigned long long
+f18 (unsigned int ax, unsigned int dx, unsigned int cx)
+{
+ return (((unsigned long long) cx) << 32) | ((unsigned int *) (((char *) arr) + ax))[dx];
+}
+
+int
+main ()
+{
+ for (int i = 0; i < 64; i++)
+ arr[i] = 64 + i;
+#define CHECK_EQ(x, y) do { if (x != y) __builtin_abort (); } while (0)
+ CHECK_EQ (f1 (8, 9), 0x4800000008ULL);
+ CHECK_EQ (f2 (8, 9), 0x4900000008ULL);
+ CHECK_EQ (f3 (8, 9), 0x4b00000008ULL);
+ CHECK_EQ (f4 (8, 9), 0x4800000009ULL);
+ CHECK_EQ (f5 (8, 9), 0x4900000009ULL);
+ CHECK_EQ (f6 (8, 9), 0x4b00000009ULL);
+ CHECK_EQ (f7 (8, 9, 10), 0x480000000aULL);
+ CHECK_EQ (f8 (8, 9, 10), 0x490000000aULL);
+ CHECK_EQ (f9 (8, 9, 10), 0x4b0000000aULL);
+ CHECK_EQ (f10 (8, 9), 0x800000048ULL);
+ CHECK_EQ (f11 (8, 9), 0x800000049ULL);
+ CHECK_EQ (f12 (8, 9), 0x80000004bULL);
+ CHECK_EQ (f13 (8, 9), 0x900000048ULL);
+ CHECK_EQ (f14 (8, 9), 0x900000049ULL);
+ CHECK_EQ (f15 (8, 9), 0x90000004bULL);
+ CHECK_EQ (f16 (8, 9, 10), 0xa00000048ULL);
+ CHECK_EQ (f17 (8, 9, 10), 0xa00000049ULL);
+ CHECK_EQ (f18 (8, 9, 10), 0xa0000004bULL);
+}
@@ -0,0 +1,175 @@
+/* PR target/109109 */
+/* { dg-do run { target lp64 } } */
+/* { dg-options "-O2" } */
+
+unsigned long arr[64];
+
+__attribute__((noipa)) unsigned __int128
+f1 (unsigned long di, unsigned long si, unsigned long dx)
+{
+ unsigned long ax;
+ asm ("" : "=a" (ax) : "0" (di));
+ return (((unsigned __int128) arr[ax]) << 64) | ax;
+}
+
+__attribute__((noipa)) unsigned __int128
+f2 (unsigned long di, unsigned long si, unsigned long dx)
+{
+ unsigned long ax;
+ asm ("" : "=a" (ax) : "0" (di));
+ return (((unsigned __int128) arr[dx]) << 64) | ax;
+}
+
+__attribute__((noipa)) unsigned __int128
+f3 (unsigned long di, unsigned long si, unsigned long dx)
+{
+ unsigned long ax;
+ asm ("" : "=a" (ax) : "0" (di));
+ return (((unsigned __int128) ((unsigned long *) (((char *) arr) + ax))[dx]) << 64) | ax;
+}
+
+__attribute__((noipa)) unsigned __int128
+f4 (unsigned long di, unsigned long si, unsigned long dx)
+{
+ unsigned long ax;
+ asm ("" : "=a" (ax) : "0" (di));
+ return (((unsigned __int128) arr[ax]) << 64) | dx;
+}
+
+__attribute__((noipa)) unsigned __int128
+f5 (unsigned long di, unsigned long si, unsigned long dx)
+{
+ unsigned long ax;
+ asm ("" : "=a" (ax) : "0" (di));
+ return (((unsigned __int128) arr[dx]) << 64) | dx;
+}
+
+__attribute__((noipa)) unsigned __int128
+f6 (unsigned long di, unsigned long si, unsigned long dx)
+{
+ unsigned long ax;
+ asm ("" : "=a" (ax) : "0" (di));
+ return (((unsigned __int128) ((unsigned long *) (((char *) arr) + ax))[dx]) << 64) | dx;
+}
+
+__attribute__((noipa)) unsigned __int128
+f7 (unsigned long di, unsigned long si, unsigned long dx, unsigned long cx)
+{
+ unsigned long ax;
+ asm ("" : "=a" (ax) : "0" (di));
+ return (((unsigned __int128) arr[ax]) << 64) | cx;
+}
+
+__attribute__((noipa)) unsigned __int128
+f8 (unsigned long di, unsigned long si, unsigned long dx, unsigned long cx)
+{
+ unsigned long ax;
+ asm ("" : "=a" (ax) : "0" (di));
+ return (((unsigned __int128) arr[dx]) << 64) | cx;
+}
+
+__attribute__((noipa)) unsigned __int128
+f9 (unsigned long di, unsigned long si, unsigned long dx, unsigned long cx)
+{
+ unsigned long ax;
+ asm ("" : "=a" (ax) : "0" (di));
+ return (((unsigned __int128) ((unsigned long *) (((char *) arr) + ax))[dx]) << 64) | cx;
+}
+
+__attribute__((noipa)) unsigned __int128
+f10 (unsigned long di, unsigned long si, unsigned long dx)
+{
+ unsigned long ax;
+ asm ("" : "=a" (ax) : "0" (di));
+ return (((unsigned __int128) ax) << 64) | arr[ax];
+}
+
+__attribute__((noipa)) unsigned __int128
+f11 (unsigned long di, unsigned long si, unsigned long dx)
+{
+ unsigned long ax;
+ asm ("" : "=a" (ax) : "0" (di));
+ return (((unsigned __int128) ax) << 64) | arr[dx];
+}
+
+__attribute__((noipa)) unsigned __int128
+f12 (unsigned long di, unsigned long si, unsigned long dx)
+{
+ unsigned long ax;
+ asm ("" : "=a" (ax) : "0" (di));
+ return (((unsigned __int128) ax) << 64) | ((unsigned long *) (((char *) arr) + ax))[dx];
+}
+
+__attribute__((noipa)) unsigned __int128
+f13 (unsigned long di, unsigned long si, unsigned long dx)
+{
+ unsigned long ax;
+ asm ("" : "=a" (ax) : "0" (di));
+ return (((unsigned __int128) dx) << 64) | arr[ax];
+}
+
+__attribute__((noipa)) unsigned __int128
+f14 (unsigned long di, unsigned long si, unsigned long dx)
+{
+ unsigned long ax;
+ asm ("" : "=a" (ax) : "0" (di));
+ return (((unsigned __int128) dx) << 64) | arr[dx];
+}
+
+__attribute__((noipa)) unsigned __int128
+f15 (unsigned long di, unsigned long si, unsigned long dx)
+{
+ unsigned long ax;
+ asm ("" : "=a" (ax) : "0" (di));
+ return (((unsigned __int128) dx) << 64) | ((unsigned long *) (((char *) arr) + ax))[dx];
+}
+
+__attribute__((noipa)) unsigned __int128
+f16 (unsigned long di, unsigned long si, unsigned long dx, unsigned long cx)
+{
+ unsigned long ax;
+ asm ("" : "=a" (ax) : "0" (di));
+ return (((unsigned __int128) cx) << 64) | arr[ax];
+}
+
+__attribute__((noipa)) unsigned __int128
+f17 (unsigned long di, unsigned long si, unsigned long dx, unsigned long cx)
+{
+ unsigned long ax;
+ asm ("" : "=a" (ax) : "0" (di));
+ return (((unsigned __int128) cx) << 64) | arr[dx];
+}
+
+__attribute__((noipa)) unsigned __int128
+f18 (unsigned long di, unsigned long si, unsigned long dx, unsigned long cx)
+{
+ unsigned long ax;
+ asm ("" : "=a" (ax) : "0" (di));
+ return (((unsigned __int128) cx) << 64) | ((unsigned long *) (((char *) arr) + ax))[dx];
+}
+
+int
+main ()
+{
+ for (int i = 0; i < 64; i++)
+ arr[i] = 64 + i;
+#define CHECK_EQ(x, y1, y2) do { unsigned __int128 y = y1; y <<= 64; y += y2; if (x != y) __builtin_abort (); } while (0)
+ CHECK_EQ (f1 (8, 0, 9), 0x48, 0x8);
+ CHECK_EQ (f2 (8, 0, 9), 0x49, 0x8);
+ CHECK_EQ (f3 (8, 0, 9), 0x4a, 0x8);
+ CHECK_EQ (f4 (8, 0, 9), 0x48, 0x9);
+ CHECK_EQ (f5 (8, 0, 9), 0x49, 0x9);
+ CHECK_EQ (f6 (8, 0, 9), 0x4a, 0x9);
+ CHECK_EQ (f7 (8, 0, 9, 10), 0x48, 0xa);
+ CHECK_EQ (f8 (8, 0, 9, 10), 0x49, 0xa);
+ CHECK_EQ (f9 (8, 0, 9, 10), 0x4a, 0xa);
+ CHECK_EQ (f10 (8, 0, 9), 0x8, 0x48);
+ CHECK_EQ (f11 (8, 0, 9), 0x8, 0x49);
+ CHECK_EQ (f12 (8, 0, 9), 0x8, 0x4a);
+ CHECK_EQ (f13 (8, 0, 9), 0x9, 0x48);
+ CHECK_EQ (f14 (8, 0, 9), 0x9, 0x49);
+ CHECK_EQ (f15 (8, 0, 9), 0x9, 0x4a);
+ CHECK_EQ (f16 (8, 0, 9, 10), 0xa, 0x48);
+ CHECK_EQ (f17 (8, 0, 9, 10), 0xa, 0x49);
+ CHECK_EQ (f18 (8, 0, 9, 10), 0xa, 0x4a);
+}