diff mbox series

[V2] RISC-V: Fix mem-to-mem VLS move pattern[PR111566]

Message ID	20230926144928.630319-1-juzhe.zhong@rivai.ai
State	Unresolved
Headers	Received-SPF: pass (google.com: domain of gcc-patches-bounces+ouuuleilei=gmail.com@gcc.gnu.org designates 8.43.85.97 as permitted sender) client-ip=8.43.85.97; DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org AA4DF3858404 From: Juzhe-Zhong <juzhe.zhong@rivai.ai> To: gcc-patches@gcc.gnu.org Cc: kito.cheng@gmail.com, kito.cheng@sifive.com, jeffreyalaw@gmail.com, rdapp.gcc@gmail.com, Juzhe-Zhong <juzhe.zhong@rivai.ai> Subject: [PATCH V2] RISC-V: Fix mem-to-mem VLS move pattern[PR111566] Date: Tue, 26 Sep 2023 22:49:28 +0800 Message-Id: <20230926144928.630319-1-juzhe.zhong@rivai.ai> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Feedback-ID: bizesmtp:rivai.ai:qybglogicsvrgz:qybglogicsvrgz7a-one-0 Precedence: list Errors-To: gcc-patches-bounces+ouuuleilei=gmail.com@gcc.gnu.org X-getmail-retrieved-from-mailbox: INBOX
Series	[V2] RISC-V: Fix mem-to-mem VLS move pattern[PR111566] \| [V2] RISC-V: Fix mem-to-mem VLS move pattern[PR111566]

Checks

Context	Check	Description
snail/gcc-patch-check	warning	Git am fail log

Commit Message

juzhe.zhong@rivai.ai Sept. 26, 2023, 2:49 p.m. UTC

  The mem-to-mem insn pattern is splitted from reg-to-mem/mem-to-reg/reg-to-reg
causes ICE in RA since RA prefer they stay together.

Now, we split mem-to-mem as a pure pre-RA split pattern and only allow
define_insn match mem-to-mem VLS move in pre-RA stage (Forbid mem-to-mem move after RA).

Tested no difference. Committed.

	PR target/111566

gcc/ChangeLog:

	* config/riscv/vector.md (*mov<mode>_mem_to_mem): Only allow mem-to-mem move for VLS modes size <= MAX_BITS_PER_WORD

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/fortran/pr111566.f90: New test.

---
 gcc/config/riscv/vector.md                    | 60 ++++++++++---------
 .../gcc.target/riscv/rvv/fortran/pr111566.f90 | 31 ++++++++++
 2 files changed, 62 insertions(+), 29 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/fortran/pr111566.f90

diff mbox series

Patch

diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index d5300a33946..a6dbaa74a10 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -1222,48 +1222,50 @@ 
     DONE;
 })
 
-(define_insn_and_split "*mov<mode>_mem_to_mem"
+;; Some VLS modes (like V2SImode) have size <= a general purpose
+;; register width, we optimize such mem-to-mem move into mem-to-mem
+;; scalar move.  Otherwise, we always force operands[1] into register
+;; so that we will never get mem-to-mem move after RA.
+(define_split
   [(set (match_operand:VLS_AVL_IMM 0 "memory_operand")
 	(match_operand:VLS_AVL_IMM 1 "memory_operand"))]
-  "TARGET_VECTOR && can_create_pseudo_p ()"
-  "#"
-  "&& 1"
+  "TARGET_VECTOR && can_create_pseudo_p ()
+   && GET_MODE_BITSIZE (<MODE>mode).to_constant () <= MAX_BITS_PER_WORD"
   [(const_int 0)]
   {
-    if (GET_MODE_BITSIZE (<MODE>mode).to_constant () <= MAX_BITS_PER_WORD)
-      {
-        /* Opitmize the following case:
-
-	    typedef int8_t v2qi __attribute__ ((vector_size (2)));
-	    v2qi v = *(v2qi*)in;
-	    *(v2qi*)out = v;
-
-	    We prefer scalar load/store instead of vle.v/vse.v when
-	    the VLS modes size is smaller scalar mode.  */
-        machine_mode mode;
-        unsigned size = GET_MODE_BITSIZE (<MODE>mode).to_constant ();
-        if (FLOAT_MODE_P (<MODE>mode))
-	  mode = mode_for_size (size, MODE_FLOAT, 0).require ();
-        else
-	  mode = mode_for_size (size, MODE_INT, 0).require ();
-        emit_move_insn (gen_lowpart (mode, operands[0]),
-		        gen_lowpart (mode, operands[1]));
-      }
+    /* Opitmize the following case:
+
+	typedef int8_t v2qi __attribute__ ((vector_size (2)));
+	v2qi v = *(v2qi*)in;
+	*(v2qi*)out = v;
+
+	We prefer scalar load/store instead of vle.v/vse.v when
+	the VLS modes size is smaller scalar mode.  */
+    machine_mode mode;
+    unsigned size = GET_MODE_BITSIZE (<MODE>mode).to_constant ();
+    if (FLOAT_MODE_P (<MODE>mode))
+      mode = mode_for_size (size, MODE_FLOAT, 0).require ();
     else
-      {
-	operands[1] = force_reg (<MODE>mode, operands[1]);
-	emit_move_insn (operands[0], operands[1]);
-      }
+      mode = mode_for_size (size, MODE_INT, 0).require ();
+    emit_move_insn (gen_lowpart (mode, operands[0]),
+    		    gen_lowpart (mode, operands[1]));
     DONE;
   }
-  [(set_attr "type" "vmov")]
 )
 
+;; We recognize mem-to-mem move in pre-RA stage so that we won't have
+;; ICE (unrecognizable insn: (set (mem) (mem))).  Then, the previous
+;; mem-to-mem split pattern will force operands[1] into a register so
+;; that mem-to-mem move will never happen after RA.
+;;
+;; We don't allow mem-to-mem move in post-RA stage since we
+;; don't have an instruction to split mem-to-mem move after RA.
 (define_insn_and_split "*mov<mode>"
   [(set (match_operand:VLS_AVL_IMM 0 "reg_or_mem_operand" "=vr, m, vr")
 	(match_operand:VLS_AVL_IMM 1 "reg_or_mem_operand" "  m,vr, vr"))]
   "TARGET_VECTOR
-   && (register_operand (operands[0], <MODE>mode)
+   && (can_create_pseudo_p ()
+       || register_operand (operands[0], <MODE>mode)
        || register_operand (operands[1], <MODE>mode))"
   "@
    #
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/fortran/pr111566.f90 b/gcc/testsuite/gcc.target/riscv/rvv/fortran/pr111566.f90
new file mode 100644
index 00000000000..2e30dc9bfaa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/fortran/pr111566.f90
@@ -0,0 +1,31 @@ 
+! { dg-do compile }
+! { dg-options "-march=rv64gcv -mabi=lp64d -Ofast -fallow-argument-mismatch -fmax-stack-var-size=65536 -S  -std=legacy -w" }
+
+module a
+  integer,parameter :: SHR_KIND_R8 = selected_real_kind(12)
+end module a
+module b
+  use a,  c => shr_kind_r8
+contains
+  subroutine d(cg , km, i1, i2)
+    real (c) ch(i2,km)
+    real (c) cg(4,i1:i2,km)
+    real  dc(i2,km)
+    real(c) ci(i2,km)
+    real(c) cj(i2,km)
+    do k=2,ck
+       do i=i1,0
+          cl = ci(i,k) *ci(i,1) /      cj(i,k)+ch(i,1)
+          cm = cg(1,i,k) - min(e,cg(1,i,co))
+          dc(i,k) = sign(cm, cl)
+       enddo
+    enddo
+    if ( cq == 0 ) then
+       do i=i1,i2
+          if( cr <=  cs ) then
+             cg= sign( min(ct,   cg),  cg)
+          endif
+       enddo
+    endif
+  end subroutine d
+end module b