RISC-V: Fix VSETVL PASS regression

Message ID 20231127132412.2440640-1-juzhe.zhong@rivai.ai
State Unresolved
Headers
Series RISC-V: Fix VSETVL PASS regression |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

juzhe.zhong@rivai.ai Nov. 27, 2023, 1:24 p.m. UTC
  This patch is regression fix patch, not an optimization patch.
Since trunk GCC generates redundant vsetvl than GCC-13.

This is the case:

bb 2:
  def a2 (vsetvl a2, zero)
bb 3:
  use a2
bb 4:
  use a2 (vle)

before this patch:

bb 2:
vsetvl a2 zero
bb 3:
vsetvl zero, zero ----> should be eliminated.
bb 4:
vle.v

The root cause is we didn't set bb 3 as transparent since the incorrect codes.
bb 3 didn't modify "a2" just use it, the VSETVL status from bb 2 can be available to bb 3 and bb 4:

bb 2 -> bb 3 -> bb4.

Another regression fix is anticipation calculation:

bb 4:
use a5 (sub)
use a5 (vle)

The vle VSETVL status should be considered as anticipated as long as both sub and vle a5 def are coming from same def.

Tested on zvl128b no regression.

I am going to test on zvl256/zvl512/zvl1024

	PR target/112713

gcc/ChangeLog:

	* config/riscv/riscv-vsetvl.cc (pre_vsetvl::compute_lcm_local_properties): Fix regression.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/vsetvl/pr112713-1.c: New test.
	* gcc.target/riscv/rvv/vsetvl/pr112713-2.c: New test.

---
 gcc/config/riscv/riscv-vsetvl.cc              | 29 ++++++++----
 .../gcc.target/riscv/rvv/vsetvl/pr112713-1.c  | 24 ++++++++++
 .../gcc.target/riscv/rvv/vsetvl/pr112713-2.c  | 47 +++++++++++++++++++
 3 files changed, 91 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-2.c
  

Patch

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 74367ec8d8e..b3e07d4c3aa 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1433,9 +1433,23 @@  private:
 
   inline bool modify_or_use_vl_p (insn_info *i, const vsetvl_info &info)
   {
-    return info.has_vl ()
-	   && (find_access (i->uses (), REGNO (info.get_vl ()))
-	       || find_access (i->defs (), REGNO (info.get_vl ())));
+    if (info.has_vl ())
+      {
+	if (find_access (i->defs (), REGNO (info.get_vl ())))
+	  return true;
+	if (find_access (i->uses (), REGNO (info.get_vl ())))
+	  {
+	    resource_info resource = full_register (REGNO (info.get_vl ()));
+	    def_lookup dl1 = crtl->ssa->find_def (resource, i);
+	    def_lookup dl2 = crtl->ssa->find_def (resource, info.get_insn ());
+	    if (dl1.matching_set () || dl2.matching_set ())
+	      return true;
+	    /* If their VLs are coming from same def, we still want to fuse
+	       their VSETVL demand info to gain better performance.  */
+	    return dl1.prev_def (i) != dl2.prev_def (i);
+	  }
+      }
+    return false;
   }
   inline bool modify_avl_p (insn_info *i, const vsetvl_info &info)
   {
@@ -1702,7 +1716,7 @@  public:
 	for (insn_info *i = next_insn->prev_nondebug_insn (); i != prev_insn;
 	     i = i->prev_nondebug_insn ())
 	  {
-	    // no def amd use of vl
+	    // no def and use of vl
 	    if (!ignore_vl && modify_or_use_vl_p (i, info))
 	      return false;
 
@@ -2635,11 +2649,8 @@  pre_vsetvl::compute_lcm_local_properties ()
 
 	      for (const insn_info *insn : bb->real_nondebug_insns ())
 		{
-		  if ((info.has_nonvlmax_reg_avl ()
-		       && find_access (insn->defs (), REGNO (info.get_avl ())))
-		      || (info.has_vl ()
-			  && find_access (insn->uses (),
-					  REGNO (info.get_vl ()))))
+		  if (info.has_nonvlmax_reg_avl ()
+		      && find_access (insn->defs (), REGNO (info.get_avl ())))
 		    {
 		      bitmap_clear_bit (m_transp[bb_index], i);
 		      break;
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-1.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-1.c
new file mode 100644
index 00000000000..76402ab6167
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-1.c
@@ -0,0 +1,24 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t
+foo (char const *buf, size_t len)
+{
+	size_t sum = 0;
+	size_t vl = __riscv_vsetvlmax_e8m8();
+	size_t step = vl * 4;
+	const char *it = buf, *end = buf + len;
+	for(; it + step <= end; ) {
+		it += vl;
+		vint8m8_t v3 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl;
+		vbool1_t m3 = __riscv_vmsgt_vx_i8m8_b1(v3, -65, vl);
+		sum += __riscv_vcpop_m_b1(m3, vl);
+	}
+	return sum;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 1 } } */
+/* { dg-final { scan-assembler-not {vsetivli} } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*m8,\s*t[au],\s*m[au]} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-2.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-2.c
new file mode 100644
index 00000000000..04539d998cf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-2.c
@@ -0,0 +1,47 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+static size_t
+utf8_count_rvv(char const *buf, size_t len)
+{
+	size_t sum = 0;
+	for (size_t vl; len > 0; len -= vl, buf += vl) {
+		vl = __riscv_vsetvl_e8m8(len);
+		vint8m8_t v = __riscv_vle8_v_i8m8((void*)buf, vl);
+		vbool1_t mask = __riscv_vmsgt_vx_i8m8_b1(v, -65, vl);
+		sum += __riscv_vcpop_m_b1(mask, vl);
+	}
+	return sum;
+}
+
+size_t
+utf8_count_rvv_4x_tail(char const *buf, size_t len)
+{
+	size_t sum = 0;
+	size_t vl = __riscv_vsetvlmax_e8m8();
+	size_t step = vl * 4;
+	const char *it = buf, *end = buf + len;
+	for(; it + step <= end; ) {
+		vint8m8_t v0 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl;
+		vint8m8_t v1 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl;
+		vint8m8_t v2 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl;
+		vint8m8_t v3 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl;
+		vbool1_t m0 = __riscv_vmsgt_vx_i8m8_b1(v0, -65, vl);
+		vbool1_t m1 = __riscv_vmsgt_vx_i8m8_b1(v1, -65, vl);
+		vbool1_t m2 = __riscv_vmsgt_vx_i8m8_b1(v2, -65, vl);
+		vbool1_t m3 = __riscv_vmsgt_vx_i8m8_b1(v3, -65, vl);
+		sum += __riscv_vcpop_m_b1(m0, vl);
+		sum += __riscv_vcpop_m_b1(m1, vl);
+		sum += __riscv_vcpop_m_b1(m2, vl);
+		sum += __riscv_vcpop_m_b1(m3, vl);
+	}
+	return sum + utf8_count_rvv(it, end - it);
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 2 } } */
+/* { dg-final { scan-assembler-not {vsetivli} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*m8,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e8,\s*m8,\s*t[au],\s*m[au]} 1 } } */