@@ -1433,9 +1433,23 @@ private:
inline bool modify_or_use_vl_p (insn_info *i, const vsetvl_info &info)
{
- return info.has_vl ()
- && (find_access (i->uses (), REGNO (info.get_vl ()))
- || find_access (i->defs (), REGNO (info.get_vl ())));
+ if (info.has_vl ())
+ {
+ if (find_access (i->defs (), REGNO (info.get_vl ())))
+ return true;
+ if (find_access (i->uses (), REGNO (info.get_vl ())))
+ {
+ resource_info resource = full_register (REGNO (info.get_vl ()));
+ def_lookup dl1 = crtl->ssa->find_def (resource, i);
+ def_lookup dl2 = crtl->ssa->find_def (resource, info.get_insn ());
+ if (dl1.matching_set () || dl2.matching_set ())
+ return true;
+ /* If their VLs are coming from same def, we still want to fuse
+ their VSETVL demand info to gain better performance. */
+ return dl1.prev_def (i) != dl2.prev_def (i);
+ }
+ }
+ return false;
}
inline bool modify_avl_p (insn_info *i, const vsetvl_info &info)
{
@@ -1702,7 +1716,7 @@ public:
for (insn_info *i = next_insn->prev_nondebug_insn (); i != prev_insn;
i = i->prev_nondebug_insn ())
{
- // no def amd use of vl
+ // no def and use of vl
if (!ignore_vl && modify_or_use_vl_p (i, info))
return false;
@@ -2635,11 +2649,8 @@ pre_vsetvl::compute_lcm_local_properties ()
for (const insn_info *insn : bb->real_nondebug_insns ())
{
- if ((info.has_nonvlmax_reg_avl ()
- && find_access (insn->defs (), REGNO (info.get_avl ())))
- || (info.has_vl ()
- && find_access (insn->uses (),
- REGNO (info.get_vl ()))))
+ if (info.has_nonvlmax_reg_avl ()
+ && find_access (insn->defs (), REGNO (info.get_avl ())))
{
bitmap_clear_bit (m_transp[bb_index], i);
break;
new file mode 100644
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t
+foo (char const *buf, size_t len)
+{
+ size_t sum = 0;
+ size_t vl = __riscv_vsetvlmax_e8m8();
+ size_t step = vl * 4;
+ const char *it = buf, *end = buf + len;
+ for(; it + step <= end; ) {
+ it += vl;
+ vint8m8_t v3 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl;
+ vbool1_t m3 = __riscv_vmsgt_vx_i8m8_b1(v3, -65, vl);
+ sum += __riscv_vcpop_m_b1(m3, vl);
+ }
+ return sum;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 1 } } */
+/* { dg-final { scan-assembler-not {vsetivli} } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*m8,\s*t[au],\s*m[au]} 1 } } */
new file mode 100644
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+static size_t
+utf8_count_rvv(char const *buf, size_t len)
+{
+ size_t sum = 0;
+ for (size_t vl; len > 0; len -= vl, buf += vl) {
+ vl = __riscv_vsetvl_e8m8(len);
+ vint8m8_t v = __riscv_vle8_v_i8m8((void*)buf, vl);
+ vbool1_t mask = __riscv_vmsgt_vx_i8m8_b1(v, -65, vl);
+ sum += __riscv_vcpop_m_b1(mask, vl);
+ }
+ return sum;
+}
+
+size_t
+utf8_count_rvv_4x_tail(char const *buf, size_t len)
+{
+ size_t sum = 0;
+ size_t vl = __riscv_vsetvlmax_e8m8();
+ size_t step = vl * 4;
+ const char *it = buf, *end = buf + len;
+ for(; it + step <= end; ) {
+ vint8m8_t v0 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl;
+ vint8m8_t v1 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl;
+ vint8m8_t v2 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl;
+ vint8m8_t v3 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl;
+ vbool1_t m0 = __riscv_vmsgt_vx_i8m8_b1(v0, -65, vl);
+ vbool1_t m1 = __riscv_vmsgt_vx_i8m8_b1(v1, -65, vl);
+ vbool1_t m2 = __riscv_vmsgt_vx_i8m8_b1(v2, -65, vl);
+ vbool1_t m3 = __riscv_vmsgt_vx_i8m8_b1(v3, -65, vl);
+ sum += __riscv_vcpop_m_b1(m0, vl);
+ sum += __riscv_vcpop_m_b1(m1, vl);
+ sum += __riscv_vcpop_m_b1(m2, vl);
+ sum += __riscv_vcpop_m_b1(m3, vl);
+ }
+ return sum + utf8_count_rvv(it, end - it);
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 2 } } */
+/* { dg-final { scan-assembler-not {vsetivli} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*m8,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e8,\s*m8,\s*t[au],\s*m[au]} 1 } } */