target/109944 - avoid STLF fail for V16QImode CTOR expansion
Checks
Commit Message
The following dispatches to V2DImode CTOR expansion instead of
using sets of (subreg:DI (reg:V16QI 146) [08]) which causes
LRA to spill DImode and reload V16QImode. The same applies for
V8QImode or V4HImode construction from SImode parts which happens
during 32bit libgcc build.
Boostrapped and tested on x86_64-unknown-linux-gnu.
OK?
Thanks,
Richard.
PR target/109944
* config/i386/i386-expand.cc (ix86_expand_vector_init_general):
Perform final vector composition using
ix86_expand_vector_init_general instead of setting
the highpart and lowpart which causes spilling.
* gcc.target/i386/pr109944-1.c: New testcase.
* gcc.target/i386/pr109944-2.c: Likewise.
---
gcc/config/i386/i386-expand.cc | 11 ++++----
gcc/testsuite/gcc.target/i386/pr109944-1.c | 30 ++++++++++++++++++++++
gcc/testsuite/gcc.target/i386/pr109944-2.c | 17 ++++++++++++
3 files changed, 53 insertions(+), 5 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/pr109944-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr109944-2.c
@@ -16367,11 +16367,12 @@ quarter:
emit_move_insn (target, gen_lowpart (mode, words[0]));
else if (n_words == 2)
{
- rtx tmp = gen_reg_rtx (mode);
- emit_clobber (tmp);
- emit_move_insn (gen_lowpart (tmp_mode, tmp), words[0]);
- emit_move_insn (gen_highpart (tmp_mode, tmp), words[1]);
- emit_move_insn (target, tmp);
+ gcc_assert (tmp_mode == DImode || tmp_mode == SImode);
+ machine_mode concat_mode = tmp_mode == DImode ? V2DImode : V2SImode;
+ rtx tmp = gen_reg_rtx (concat_mode);
+ vals = gen_rtx_PARALLEL (concat_mode, gen_rtvec_v (2, words));
+ ix86_expand_vector_init_general (false, concat_mode, tmp, vals);
+ emit_move_insn (target, gen_lowpart (mode, tmp));
}
else if (n_words == 4)
{
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+void foo (char * __restrict a, char *b)
+{
+ a[0] = b[0];
+ a[1] = b[16];
+ a[2] = b[32];
+ a[3] = b[48];
+ a[4] = b[64];
+ a[5] = b[80];
+ a[6] = b[96];
+ a[7] = b[112];
+ a[8] = b[128];
+ a[9] = b[144];
+ a[10] = b[160];
+ a[11] = b[176];
+ a[12] = b[192];
+ a[13] = b[208];
+ a[14] = b[224];
+ a[15] = b[240];
+}
+
+/* We do not want to generate a spill/reload for when the store is vectorized.
+ movq %rdx, -24(%rsp)
+...
+ movq %rax, -16(%rsp)
+ movdqa -24(%rsp), %xmm0
+ movups %xmm0, (%rdi) */
+/* { dg-final { scan-assembler-not "movdq\[^\r\n\]*\[bs\]p\\\), %xmm" } } */
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+typedef char v16qi __attribute__((vector_size(16)));
+v16qi foo (char *b)
+{
+ return (v16qi){ b[0], b[16], b[32], b[48], b[64], b[80], b[96], b[112],
+ b[128], b[144], b[160], b[176], b[192], b[208], b[224], b[240] };
+}
+
+/* We do not want to generate a spill/reload
+ movq %rdx, -24(%rsp)
+...
+ movq %rax, -16(%rsp)
+ movdqa -24(%rsp), %xmm0
+ movups %xmm0, (%rdi) */
+/* { dg-final { scan-assembler-not "movdq\[^\r\n\]*\[bs\]p\\\), %xmm" } } */