middle-end/113622 - handle store with variable index to register

Message ID 20240129121209.87FD6385840B@sourceware.org
State Accepted
Headers
Series middle-end/113622 - handle store with variable index to register |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

Richard Biener Jan. 29, 2024, 12:05 p.m. UTC
  The following implements storing to a non-MEM_P with a variable
offset.  We usually avoid this by forcing expansion to memory but
this doesn't work for hard register variables.  The solution is
to spill and operate on the stack.

Bootstrapped and tested on x86_64-unknown-linux-gnu, OK?

I realize the flow is a bit awkward, but short of duplicating a lot
of code I can't see a better way.  Forcing some lowering on GIMPLE
(creating the copy there) might be another away.  But then we
could possibly lower the whole vector indexing in a different way
in the first place ...

Thanks,
Richard.

	PR middle-end/113622
	* expr.cc (expand_assignment): Spill hard registers if
	we index them with a variable offset.

	* gcc.target/i386/pr113622-2.c: New testcase.
	* gcc.target/i386/pr113622-3.c: Likewise.
---
 gcc/expr.cc                                | 23 +++++++++++++++++++---
 gcc/testsuite/gcc.target/i386/pr113622-2.c | 12 +++++++++++
 gcc/testsuite/gcc.target/i386/pr113622-3.c | 12 +++++++++++
 3 files changed, 44 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr113622-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr113622-3.c
  

Patch

diff --git a/gcc/expr.cc b/gcc/expr.cc
index ee822c11dce..f13f07a2324 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -6061,6 +6061,7 @@  expand_assignment (tree to, tree from, bool nontemporal)
 	    to_rtx = adjust_address (to_rtx, BLKmode, 0);
 	}
  
+      rtx stemp = NULL_RTX, old_to_rtx = NULL_RTX;
       if (offset != 0)
 	{
 	  machine_mode address_mode;
@@ -6070,9 +6071,22 @@  expand_assignment (tree to, tree from, bool nontemporal)
 	    {
 	      /* We can get constant negative offsets into arrays with broken
 		 user code.  Translate this to a trap instead of ICEing.  */
-	      gcc_assert (TREE_CODE (offset) == INTEGER_CST);
-	      expand_builtin_trap ();
-	      to_rtx = gen_rtx_MEM (BLKmode, const0_rtx);
+	      if (TREE_CODE (offset) == INTEGER_CST)
+		{
+		  expand_builtin_trap ();
+		  to_rtx = gen_rtx_MEM (BLKmode, const0_rtx);
+		}
+	      /* Else spill for variable offset to the destination.  We expect
+		 to run into this only for hard registers.  */
+	      else
+		{
+		  gcc_assert (DECL_HARD_REGISTER (tem));
+		  stemp = assign_stack_temp (GET_MODE (to_rtx),
+					     GET_MODE_SIZE (GET_MODE (to_rtx)));
+		  emit_move_insn (stemp, to_rtx);
+		  old_to_rtx = to_rtx;
+		  to_rtx = stemp;
+		}
 	    }
 
 	  offset_rtx = expand_expr (offset, NULL_RTX, VOIDmode, EXPAND_SUM);
@@ -6305,6 +6319,9 @@  expand_assignment (tree to, tree from, bool nontemporal)
 				  bitregion_start, bitregion_end,
 				  mode1, from, get_alias_set (to),
 				  nontemporal, reversep);
+	  /* Move the temporary storage back to the non-MEM_P.  */
+	  if (stemp)
+	    emit_move_insn (old_to_rtx, stemp);
 	}
 
       if (result)
diff --git a/gcc/testsuite/gcc.target/i386/pr113622-2.c b/gcc/testsuite/gcc.target/i386/pr113622-2.c
new file mode 100644
index 00000000000..7bcc12af27e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113622-2.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile } */
+/* { dg-options "-msse -w" } */
+
+typedef double __attribute__ ((vector_size (16))) vec;
+register vec a asm("xmm5"), b asm("xmm6"), c asm("xmm7");
+
+void
+test (void)
+{
+  for (int i = 0; i < 2; i++)
+    c[i] = a[i] < b[i] ? 0.1 : 0.2;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr113622-3.c b/gcc/testsuite/gcc.target/i386/pr113622-3.c
new file mode 100644
index 00000000000..ca79d4ac901
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113622-3.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile } */
+/* { dg-options "-msse" } */
+
+typedef double __attribute__ ((vector_size (16))) vec;
+
+void
+test (void)
+{
+  register vec a asm("xmm5"), b asm("xmm6"), c asm("xmm7");
+  for (int i = 0; i < 2; i++)
+    c[i] = a[i] < b[i] ? 0.1 : 0.2;
+}