RISC-V: Fixed failed rvv combine testcases

Message ID 20231107074933.4025916-1-lehua.ding@rivai.ai
State Unresolved
Headers
Series RISC-V: Fixed failed rvv combine testcases |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

Lehua Ding Nov. 7, 2023, 7:49 a.m. UTC
  Hi,

This patch fixed the fellowing failed testcases on the trunk:
FAIL: gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c scan-assembler-times \\tvfwredusum\\.vs\\tv[0-9]+,v[0-9]+,v[0-9]+,v0\\.t 2
...
FAIL: gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c scan-assembler-times \\tvwredsumu\\.vs\\tv[0-9]+,v[0-9]+,v[0-9]+,v0\\.t 3
...

The reason for these failed testcases is the introduce of .VCOND_MASK_LEN
in midend for other bugfix and further leads to a new vcond_mask_len rtl
pattern after expand. So we need add new combine patterns handle this case.

Consider this code:

int16_t foo (int8_t *restrict a, int8_t *restrict pred)
{
  int16_t sum = 0;
  for (int i = 0; i < 16; i += 1)
    if (pred[i])
      sum += a[i];
  return sum;
}

Before this patch:
foo:
        vsetivli        zero,16,e8,m1,ta,ma
        vle8.v  v0,0(a1)
        vsetvli a5,zero,e8,m1,ta,ma
        vmsne.vi        v0,v0,0
        vsetvli zero,zero,e16,m2,ta,ma
        li      a3,0
        vmv.v.i v2,0
        vsetivli        zero,16,e16,m2,ta,ma
        vle8.v  v6,0(a0),v0.t
        vmv.s.x v1,a3
        vsetvli a5,zero,e16,m2,ta,ma
        vsext.vf2       v4,v6
        vsetivli        zero,16,e16,m2,tu,ma
        vmerge.vvm      v2,v2,v4,v0
        vsetvli a5,zero,e16,m2,ta,ma
        vredsum.vs      v2,v2,v1
        vmv.x.s a0,v2
        slliw   a0,a0,16
        sraiw   a0,a0,16
        ret

After this patch:
foo:
	vsetivli	zero,16,e16,m2,ta,ma
	li	a5,0
	vle8.v	v0,0(a1)
	vmv.s.x	v1,a5
	vsetvli	zero,zero,e8,m1,ta,ma
	vmsne.vi	v0,v0,0
	vle8.v	v2,0(a0),v0.t
	vwredsum.vs	v1,v2,v1,v0.t
	vsetvli	zero,zero,e16,m1,ta,ma
	vmv.x.s	a0,v1
	slliw	a0,a0,16
	sraiw	a0,a0,16
	ret

Combine the vsext.vf2, vmerge.vvm, and vredsum.vs instructions while
reducing the corresponding vsetvl instructions.

gcc/ChangeLog:

	* config/riscv/autovec-opt.md (*cond_len_<optab><v_double_trunc><mode>):
	New combine pattern.
	(*cond_len_<optab><v_quad_trunc><mode>): Ditto.
	(*cond_len_<optab><v_oct_trunc><mode>): Ditto.
	(*cond_len_extend<v_double_trunc><mode>): Ditto.
	(*cond_len_widen_reduc_plus_scal_<mode>): Ditto.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c:
	* gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c:

---
 gcc/config/riscv/autovec-opt.md               | 214 ++++++++++++++++++
 .../rvv/autovec/cond/cond_widen_reduc-1.c     |  13 +-
 .../rvv/autovec/cond/cond_widen_reduc-2.c     |  30 +--
 3 files changed, 232 insertions(+), 25 deletions(-)
  

Comments

juzhe.zhong@rivai.ai Nov. 7, 2023, 7:51 a.m. UTC | #1
LGTM. Thanks for fixing it.



juzhe.zhong@rivai.ai
 
From: Lehua Ding
Date: 2023-11-07 15:49
To: gcc-patches
CC: juzhe.zhong; kito.cheng; rdapp.gcc; palmer; jeffreyalaw; lehua.ding
Subject: [PATCH] RISC-V: Fixed failed rvv combine testcases
Hi,
 
This patch fixed the fellowing failed testcases on the trunk:
FAIL: gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c scan-assembler-times \\tvfwredusum\\.vs\\tv[0-9]+,v[0-9]+,v[0-9]+,v0\\.t 2
...
FAIL: gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c scan-assembler-times \\tvwredsumu\\.vs\\tv[0-9]+,v[0-9]+,v[0-9]+,v0\\.t 3
...
 
The reason for these failed testcases is the introduce of .VCOND_MASK_LEN
in midend for other bugfix and further leads to a new vcond_mask_len rtl
pattern after expand. So we need add new combine patterns handle this case.
 
Consider this code:
 
int16_t foo (int8_t *restrict a, int8_t *restrict pred)
{
  int16_t sum = 0;
  for (int i = 0; i < 16; i += 1)
    if (pred[i])
      sum += a[i];
  return sum;
}
 
Before this patch:
foo:
        vsetivli        zero,16,e8,m1,ta,ma
        vle8.v  v0,0(a1)
        vsetvli a5,zero,e8,m1,ta,ma
        vmsne.vi        v0,v0,0
        vsetvli zero,zero,e16,m2,ta,ma
        li      a3,0
        vmv.v.i v2,0
        vsetivli        zero,16,e16,m2,ta,ma
        vle8.v  v6,0(a0),v0.t
        vmv.s.x v1,a3
        vsetvli a5,zero,e16,m2,ta,ma
        vsext.vf2       v4,v6
        vsetivli        zero,16,e16,m2,tu,ma
        vmerge.vvm      v2,v2,v4,v0
        vsetvli a5,zero,e16,m2,ta,ma
        vredsum.vs      v2,v2,v1
        vmv.x.s a0,v2
        slliw   a0,a0,16
        sraiw   a0,a0,16
        ret
 
After this patch:
foo:
vsetivli zero,16,e16,m2,ta,ma
li a5,0
vle8.v v0,0(a1)
vmv.s.x v1,a5
vsetvli zero,zero,e8,m1,ta,ma
vmsne.vi v0,v0,0
vle8.v v2,0(a0),v0.t
vwredsum.vs v1,v2,v1,v0.t
vsetvli zero,zero,e16,m1,ta,ma
vmv.x.s a0,v1
slliw a0,a0,16
sraiw a0,a0,16
ret
 
Combine the vsext.vf2, vmerge.vvm, and vredsum.vs instructions while
reducing the corresponding vsetvl instructions.
 
gcc/ChangeLog:
 
* config/riscv/autovec-opt.md (*cond_len_<optab><v_double_trunc><mode>):
New combine pattern.
(*cond_len_<optab><v_quad_trunc><mode>): Ditto.
(*cond_len_<optab><v_oct_trunc><mode>): Ditto.
(*cond_len_extend<v_double_trunc><mode>): Ditto.
(*cond_len_widen_reduc_plus_scal_<mode>): Ditto.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c:
* gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c:
 
---
gcc/config/riscv/autovec-opt.md               | 214 ++++++++++++++++++
.../rvv/autovec/cond/cond_widen_reduc-1.c     |  13 +-
.../rvv/autovec/cond/cond_widen_reduc-2.c     |  30 +--
3 files changed, 232 insertions(+), 25 deletions(-)
 
diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index d0f8b3cde4e..3c87e66ea49 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -194,6 +194,84 @@
}
[(set_attr "type" "vector")])
+;; Combine sign_extend/zero_extend(vf2) and vcond_mask_len
+(define_insn_and_split "*cond_len_<optab><v_double_trunc><mode>"
+  [(set (match_operand:VWEXTI 0 "register_operand")
+    (if_then_else:VWEXTI
+      (unspec:<VM>
+        [(match_operand 4 "vector_length_operand")
+         (match_operand 5 "const_int_operand")
+         (match_operand 6 "const_int_operand")
+         (reg:SI VL_REGNUM)
+         (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+      (vec_merge:VWEXTI
+        (any_extend:VWEXTI (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))
+        (match_operand:VWEXTI 1 "vector_merge_operand")
+ (match_operand:<VM> 3 "register_operand"))
+      (match_dup 1)))]
+  "TARGET_VECTOR"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  emit_insn (gen_pred_<optab><mode>_vf2 (operands[0], operands[3], operands[1], operands[2],
+                                         operands[4], operands[5], operands[6], CONST0_RTX (Pmode)));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
+;; Combine sign_extend/zero_extend(vf4) and vcond_mask_len
+(define_insn_and_split "*cond_len_<optab><v_quad_trunc><mode>"
+  [(set (match_operand:VQEXTI 0 "register_operand")
+    (if_then_else:VQEXTI
+      (unspec:<VM>
+        [(match_operand 4 "vector_length_operand")
+         (match_operand 5 "const_int_operand")
+         (match_operand 6 "const_int_operand")
+         (reg:SI VL_REGNUM)
+         (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+      (vec_merge:VQEXTI
+        (any_extend:VQEXTI (match_operand:<V_QUAD_TRUNC> 2 "register_operand"))
+        (match_operand:VQEXTI 1 "vector_merge_operand")
+ (match_operand:<VM> 3 "register_operand"))
+      (match_dup 1)))]
+  "TARGET_VECTOR"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  emit_insn (gen_pred_<optab><mode>_vf4 (operands[0], operands[3], operands[1], operands[2],
+                                         operands[4], operands[5], operands[6], CONST0_RTX (Pmode)));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
+;; Combine sign_extend/zero_extend(vf8) and vcond_mask_len
+(define_insn_and_split "*cond_len_<optab><v_oct_trunc><mode>"
+  [(set (match_operand:VOEXTI 0 "register_operand")
+    (if_then_else:VOEXTI
+      (unspec:<VM>
+        [(match_operand 4 "vector_length_operand")
+         (match_operand 5 "const_int_operand")
+         (match_operand 6 "const_int_operand")
+         (reg:SI VL_REGNUM)
+         (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+      (vec_merge:VOEXTI
+        (any_extend:VOEXTI (match_operand:<V_OCT_TRUNC> 2 "register_operand"))
+        (match_operand:VOEXTI 1 "vector_merge_operand")
+ (match_operand:<VM> 3 "register_operand"))
+      (match_dup 1)))]
+  "TARGET_VECTOR"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  emit_insn (gen_pred_<optab><mode>_vf8 (operands[0], operands[3], operands[1], operands[2],
+                                         operands[4], operands[5], operands[6], CONST0_RTX (Pmode)));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
;; Combine trunc(vf2) + vcond_mask
(define_insn_and_split "*cond_trunc<mode><v_double_trunc>"
   [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand")
@@ -235,6 +313,32 @@
}
[(set_attr "type" "vector")])
+;; Combine FP extend(vf2) and vcond_mask_len
+(define_insn_and_split "*cond_len_extend<v_double_trunc><mode>"
+  [(set (match_operand:VWEXTF_ZVFHMIN 0 "register_operand")
+    (if_then_else:VWEXTF_ZVFHMIN
+      (unspec:<VM>
+        [(match_operand 4 "vector_length_operand")
+         (match_operand 5 "const_int_operand")
+         (match_operand 6 "const_int_operand")
+         (reg:SI VL_REGNUM)
+         (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+      (vec_merge:VWEXTF_ZVFHMIN
+        (float_extend:VWEXTF_ZVFHMIN (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))
+        (match_operand:VWEXTF_ZVFHMIN 1 "vector_merge_operand")
+ (match_operand:<VM> 3 "register_operand"))
+      (match_dup 1)))]
+  "TARGET_VECTOR"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  emit_insn (gen_pred_extend<mode> (operands[0], operands[3], operands[1], operands[2],
+                                    operands[4], operands[5], operands[6], CONST0_RTX (Pmode)));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
;; Combine FP trunc(vf2) + vcond_mask
(define_insn_and_split "*cond_trunc<mode><v_double_trunc>"
   [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand")
@@ -1151,6 +1255,61 @@
}
[(set_attr "type" "vector")])
+;; Combine mask_len_extend + vredsum to mask_vwredsum[u]
+;; where the mrege of mask_len_extend is vector const 0
+(define_insn_and_split "*cond_len_widen_reduc_plus_scal_<mode>"
+  [(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand")
+        (unspec:<V_DOUBLE_EXTEND_VEL> [
+          (if_then_else:<V_DOUBLE_EXTEND>
+            (unspec:<VM> [
+              (match_operand 2 "vector_length_operand")
+              (const_int 0)
+              (const_int 0)
+              (reg:SI VL_REGNUM)
+              (reg:SI VTYPE_REGNUM)
+            ] UNSPEC_VPREDICATE)
+            (vec_merge:<V_DOUBLE_EXTEND>
+              (any_extend:<V_DOUBLE_EXTEND>
+                (match_operand:VI_QHS_NO_M8 3 "register_operand"))
+              (if_then_else:<V_DOUBLE_EXTEND>
+                (unspec:<VM> [
+                  (match_operand:<VM> 4 "vector_all_trues_mask_operand")
+                  (match_operand 5 "vector_length_operand")
+                  (match_operand 6 "const_int_operand")
+                  (match_operand 7 "const_int_operand")
+                  (match_operand 8 "const_1_or_2_operand")
+                  (reg:SI VL_REGNUM)
+                  (reg:SI VTYPE_REGNUM)
+                ] UNSPEC_VPREDICATE)
+                (match_operand:<V_DOUBLE_EXTEND> 9 "vector_const_0_operand")
+                (match_operand:<V_DOUBLE_EXTEND> 10 "vector_merge_operand"))
+              (match_operand:<VM> 1 "register_operand"))
+            (if_then_else:<V_DOUBLE_EXTEND>
+              (unspec:<VM> [
+                (match_dup 4)
+                (match_dup 5)
+                (match_dup 6)
+                (match_dup 7)
+                (match_dup 8)
+                (reg:SI VL_REGNUM)
+                (reg:SI VTYPE_REGNUM)
+              ] UNSPEC_VPREDICATE)
+              (match_dup 9)
+              (match_dup 10)))
+        ] UNSPEC_REDUC_SUM))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx ops[] = {operands[0], operands[3], operands[1], operands[2]};
+  riscv_vector::expand_reduction (<WREDUC_UNSPEC>,
+                                  riscv_vector::REDUCE_OP_M,
+                                  ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
;; Combine mask_extend + vfredsum to mask_vfwredusum
;; where the mrege of mask_extend is vector const 0
(define_insn_and_split "*cond_widen_reduc_plus_scal_<mode>"
@@ -1187,6 +1346,61 @@
}
[(set_attr "type" "vector")])
+;; Combine mask_len_extend + vredsum to mask_vwredsum[u]
+;; where the mrege of mask_len_extend is vector const 0
+(define_insn_and_split "*cond_len_widen_reduc_plus_scal_<mode>"
+  [(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand")
+        (unspec:<V_DOUBLE_EXTEND_VEL> [
+          (if_then_else:<V_DOUBLE_EXTEND>
+            (unspec:<VM> [
+              (match_operand 2 "vector_length_operand")
+              (const_int 0)
+              (const_int 0)
+              (reg:SI VL_REGNUM)
+              (reg:SI VTYPE_REGNUM)
+            ] UNSPEC_VPREDICATE)
+            (vec_merge:<V_DOUBLE_EXTEND>
+              (float_extend:<V_DOUBLE_EXTEND>
+                (match_operand:VF_HS_NO_M8 3 "register_operand"))
+              (if_then_else:<V_DOUBLE_EXTEND>
+                (unspec:<VM> [
+                  (match_operand:<VM> 4 "vector_all_trues_mask_operand")
+                  (match_operand 5 "vector_length_operand")
+                  (match_operand 6 "const_int_operand")
+                  (match_operand 7 "const_int_operand")
+                  (match_operand 8 "const_1_or_2_operand")
+                  (reg:SI VL_REGNUM)
+                  (reg:SI VTYPE_REGNUM)
+                ] UNSPEC_VPREDICATE)
+                (match_operand:<V_DOUBLE_EXTEND> 9 "vector_const_0_operand")
+                (match_operand:<V_DOUBLE_EXTEND> 10 "vector_merge_operand"))
+              (match_operand:<VM> 1 "register_operand"))
+            (if_then_else:<V_DOUBLE_EXTEND>
+              (unspec:<VM> [
+                (match_dup 4)
+                (match_dup 5)
+                (match_dup 6)
+                (match_dup 7)
+                (match_dup 8)
+                (reg:SI VL_REGNUM)
+                (reg:SI VTYPE_REGNUM)
+              ] UNSPEC_VPREDICATE)
+              (match_dup 9)
+              (match_dup 10)))
+        ] UNSPEC_REDUC_SUM_UNORDERED))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx ops[] = {operands[0], operands[3], operands[1], operands[2]};
+  riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_UNORDERED,
+                                  riscv_vector::REDUCE_OP_M_FRM_DYN,
+                                  ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
;; =============================================================================
;; Misc combine patterns
;; =============================================================================
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c
index 22a71048684..47889f3a1cd 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c
@@ -15,16 +15,27 @@
#define TEST_ALL(TEST)                                                         \
   TEST (int16_t, int8_t, 16)                                                   \
+  TEST (int32_t, int8_t, 8)                                                    \
   TEST (int32_t, int16_t, 8)                                                   \
+  TEST (int64_t, int8_t, 4)                                                    \
+  TEST (int64_t, int16_t, 4)                                                   \
   TEST (int64_t, int32_t, 4)                                                   \
   TEST (uint16_t, uint8_t, 16)                                                 \
+  TEST (uint32_t, uint8_t, 8)                                                  \
   TEST (uint32_t, uint16_t, 8)                                                 \
+  TEST (uint64_t, uint8_t, 4)                                                  \
+  TEST (uint64_t, uint16_t, 4)                                                 \
   TEST (uint64_t, uint32_t, 4)                                                 \
   TEST (float, _Float16, 8)                                                    \
+  TEST (double, _Float16, 4)                                                   \
   TEST (double, float, 4)
TEST_ALL (TEST_TYPE)
-/* { dg-final { scan-assembler-times {\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
/* { dg-final { scan-assembler-times {\tvwredsum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
/* { dg-final { scan-assembler-times {\tvwredsumu\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
+/* { dg-final { scan-assembler-times {\tvsext\.vf4\tv[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvsext\.vf8\tv[0-9]+,v[0-9]+,v0\.t} 1 } } */
+/* { dg-final { scan-assembler-times {\tvzext\.vf4\tv[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvzext\.vf8\tv[0-9]+,v[0-9]+,v0\.t} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c
index 7c8fedd072b..662d1351215 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c
@@ -1,30 +1,12 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv64gcv_zvfh_zvl128b -mabi=lp64d --param riscv-autovec-preference=scalable --param riscv-autovec-lmul=m2 -fno-vect-cost-model -ffast-math" } */
-#include <stdint-gcc.h>
-#define TEST_TYPE(TYPE1, TYPE2, N)                                             \
-  __attribute__ ((noipa))                                                      \
-  TYPE1 reduc_##TYPE1##_##TYPE2 (TYPE2 *restrict a, TYPE2 *restrict pred)      \
-  {                                                                            \
-    TYPE1 sum = 0;                                                             \
-    for (int i = 0; i < N; i += 1)                                             \
-      if (pred[i])                                                             \
- sum += a[i];                                                           \
-    return sum;                                                                \
-  }
+#include "cond_widen_reduc-1.c"
-#define TEST_ALL(TEST)                                                         \
-  TEST (int16_t, int8_t, 16)                                                   \
-  TEST (int32_t, int16_t, 8)                                                   \
-  TEST (int64_t, int32_t, 4)                                                   \
-  TEST (uint16_t, uint8_t, 16)                                                 \
-  TEST (uint32_t, uint16_t, 8)                                                 \
-  TEST (uint64_t, uint32_t, 4)                                                 \
-  TEST (float, _Float16, 8)                                                    \
-  TEST (double, float, 4)
-
-TEST_ALL (TEST_TYPE)
-
-/* { dg-final { scan-assembler-times {\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
/* { dg-final { scan-assembler-times {\tvwredsum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
/* { dg-final { scan-assembler-times {\tvwredsumu\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
+/* { dg-final { scan-assembler-times {\tvsext\.vf4\tv[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvsext\.vf8\tv[0-9]+,v[0-9]+,v0\.t} 1 } } */
+/* { dg-final { scan-assembler-times {\tvzext\.vf4\tv[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvzext\.vf8\tv[0-9]+,v[0-9]+,v0\.t} 1 } } */
-- 
2.36.3
  
Lehua Ding Nov. 7, 2023, 7:54 a.m. UTC | #2
Committed, thanks Juzhe.

On 2023/11/7 15:51, juzhe.zhong@rivai.ai wrote:
> LGTM. Thanks for fixing it.
> 
> ------------------------------------------------------------------------
> juzhe.zhong@rivai.ai
> 
>     *From:* Lehua Ding <mailto:lehua.ding@rivai.ai>
>     *Date:* 2023-11-07 15:49
>     *To:* gcc-patches <mailto:gcc-patches@gcc.gnu.org>
>     *CC:* juzhe.zhong <mailto:juzhe.zhong@rivai.ai>; kito.cheng
>     <mailto:kito.cheng@gmail.com>; rdapp.gcc
>     <mailto:rdapp.gcc@gmail.com>; palmer <mailto:palmer@rivosinc.com>;
>     jeffreyalaw <mailto:jeffreyalaw@gmail.com>; lehua.ding
>     <mailto:lehua.ding@rivai.ai>
>     *Subject:* [PATCH] RISC-V: Fixed failed rvv combine testcases
>     Hi,
>     This patch fixed the fellowing failed testcases on the trunk:
>     FAIL: gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c
>     scan-assembler-times
>     \\tvfwredusum\\.vs\\tv[0-9]+,v[0-9]+,v[0-9]+,v0\\.t 2
>     ...
>     FAIL: gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c
>     scan-assembler-times
>     \\tvwredsumu\\.vs\\tv[0-9]+,v[0-9]+,v[0-9]+,v0\\.t 3
>     ...
>     The reason for these failed testcases is the introduce of
>     .VCOND_MASK_LEN
>     in midend for other bugfix and further leads to a new vcond_mask_len rtl
>     pattern after expand. So we need add new combine patterns handle
>     this case.
>     Consider this code:
>     int16_t foo (int8_t *restrict a, int8_t *restrict pred)
>     {
>        int16_t sum = 0;
>        for (int i = 0; i < 16; i += 1)
>          if (pred[i])
>            sum += a[i];
>        return sum;
>     }
>     Before this patch:
>     foo:
>              vsetivli        zero,16,e8,m1,ta,ma
>              vle8.v  v0,0(a1)
>              vsetvli a5,zero,e8,m1,ta,ma
>              vmsne.vi        v0,v0,0
>              vsetvli zero,zero,e16,m2,ta,ma
>              li      a3,0
>              vmv.v.i v2,0
>              vsetivli        zero,16,e16,m2,ta,ma
>              vle8.v  v6,0(a0),v0.t
>              vmv.s.x v1,a3
>              vsetvli a5,zero,e16,m2,ta,ma
>              vsext.vf2       v4,v6
>              vsetivli        zero,16,e16,m2,tu,ma
>              vmerge.vvm      v2,v2,v4,v0
>              vsetvli a5,zero,e16,m2,ta,ma
>              vredsum.vs      v2,v2,v1
>              vmv.x.s a0,v2
>              slliw   a0,a0,16
>              sraiw   a0,a0,16
>              ret
>     After this patch:
>     foo:
>     vsetivli zero,16,e16,m2,ta,ma
>     li a5,0
>     vle8.v v0,0(a1)
>     vmv.s.x v1,a5
>     vsetvli zero,zero,e8,m1,ta,ma
>     vmsne.vi v0,v0,0
>     vle8.v v2,0(a0),v0.t
>     vwredsum.vs v1,v2,v1,v0.t
>     vsetvli zero,zero,e16,m1,ta,ma
>     vmv.x.s a0,v1
>     slliw a0,a0,16
>     sraiw a0,a0,16
>     ret
>     Combine the vsext.vf2, vmerge.vvm, and vredsum.vs instructions while
>     reducing the corresponding vsetvl instructions.
>     gcc/ChangeLog:
>     * config/riscv/autovec-opt.md (*cond_len_<optab><v_double_trunc><mode>):
>     New combine pattern.
>     (*cond_len_<optab><v_quad_trunc><mode>): Ditto.
>     (*cond_len_<optab><v_oct_trunc><mode>): Ditto.
>     (*cond_len_extend<v_double_trunc><mode>): Ditto.
>     (*cond_len_widen_reduc_plus_scal_<mode>): Ditto.
>     gcc/testsuite/ChangeLog:
>     * gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c:
>     * gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c:
>     ---
>     gcc/config/riscv/autovec-opt.md               | 214 ++++++++++++++++++
>     .../rvv/autovec/cond/cond_widen_reduc-1.c     |  13 +-
>     .../rvv/autovec/cond/cond_widen_reduc-2.c     |  30 +--
>     3 files changed, 232 insertions(+), 25 deletions(-)
>     diff --git a/gcc/config/riscv/autovec-opt.md
>     b/gcc/config/riscv/autovec-opt.md
>     index d0f8b3cde4e..3c87e66ea49 100644
>     --- a/gcc/config/riscv/autovec-opt.md
>     +++ b/gcc/config/riscv/autovec-opt.md
>     @@ -194,6 +194,84 @@
>     }
>     [(set_attr "type" "vector")])
>     +;; Combine sign_extend/zero_extend(vf2) and vcond_mask_len
>     +(define_insn_and_split "*cond_len_<optab><v_double_trunc><mode>"
>     +  [(set (match_operand:VWEXTI 0 "register_operand")
>     +    (if_then_else:VWEXTI
>     +      (unspec:<VM>
>     +        [(match_operand 4 "vector_length_operand")
>     +         (match_operand 5 "const_int_operand")
>     +         (match_operand 6 "const_int_operand")
>     +         (reg:SI VL_REGNUM)
>     +         (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
>     +      (vec_merge:VWEXTI
>     +        (any_extend:VWEXTI (match_operand:<V_DOUBLE_TRUNC> 2
>     "register_operand"))
>     +        (match_operand:VWEXTI 1 "vector_merge_operand")
>     + (match_operand:<VM> 3 "register_operand"))
>     +      (match_dup 1)))]
>     +  "TARGET_VECTOR"
>     +  "#"
>     +  "&& 1"
>     +  [(const_int 0)]
>     +{
>     +  emit_insn (gen_pred_<optab><mode>_vf2 (operands[0], operands[3],
>     operands[1], operands[2],
>     +                                         operands[4], operands[5],
>     operands[6], CONST0_RTX (Pmode)));
>     +  DONE;
>     +}
>     +[(set_attr "type" "vector")])
>     +
>     +;; Combine sign_extend/zero_extend(vf4) and vcond_mask_len
>     +(define_insn_and_split "*cond_len_<optab><v_quad_trunc><mode>"
>     +  [(set (match_operand:VQEXTI 0 "register_operand")
>     +    (if_then_else:VQEXTI
>     +      (unspec:<VM>
>     +        [(match_operand 4 "vector_length_operand")
>     +         (match_operand 5 "const_int_operand")
>     +         (match_operand 6 "const_int_operand")
>     +         (reg:SI VL_REGNUM)
>     +         (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
>     +      (vec_merge:VQEXTI
>     +        (any_extend:VQEXTI (match_operand:<V_QUAD_TRUNC> 2
>     "register_operand"))
>     +        (match_operand:VQEXTI 1 "vector_merge_operand")
>     + (match_operand:<VM> 3 "register_operand"))
>     +      (match_dup 1)))]
>     +  "TARGET_VECTOR"
>     +  "#"
>     +  "&& 1"
>     +  [(const_int 0)]
>     +{
>     +  emit_insn (gen_pred_<optab><mode>_vf4 (operands[0], operands[3],
>     operands[1], operands[2],
>     +                                         operands[4], operands[5],
>     operands[6], CONST0_RTX (Pmode)));
>     +  DONE;
>     +}
>     +[(set_attr "type" "vector")])
>     +
>     +;; Combine sign_extend/zero_extend(vf8) and vcond_mask_len
>     +(define_insn_and_split "*cond_len_<optab><v_oct_trunc><mode>"
>     +  [(set (match_operand:VOEXTI 0 "register_operand")
>     +    (if_then_else:VOEXTI
>     +      (unspec:<VM>
>     +        [(match_operand 4 "vector_length_operand")
>     +         (match_operand 5 "const_int_operand")
>     +         (match_operand 6 "const_int_operand")
>     +         (reg:SI VL_REGNUM)
>     +         (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
>     +      (vec_merge:VOEXTI
>     +        (any_extend:VOEXTI (match_operand:<V_OCT_TRUNC> 2
>     "register_operand"))
>     +        (match_operand:VOEXTI 1 "vector_merge_operand")
>     + (match_operand:<VM> 3 "register_operand"))
>     +      (match_dup 1)))]
>     +  "TARGET_VECTOR"
>     +  "#"
>     +  "&& 1"
>     +  [(const_int 0)]
>     +{
>     +  emit_insn (gen_pred_<optab><mode>_vf8 (operands[0], operands[3],
>     operands[1], operands[2],
>     +                                         operands[4], operands[5],
>     operands[6], CONST0_RTX (Pmode)));
>     +  DONE;
>     +}
>     +[(set_attr "type" "vector")])
>     +
>     ;; Combine trunc(vf2) + vcond_mask
>     (define_insn_and_split "*cond_trunc<mode><v_double_trunc>"
>         [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand")
>     @@ -235,6 +313,32 @@
>     }
>     [(set_attr "type" "vector")])
>     +;; Combine FP extend(vf2) and vcond_mask_len
>     +(define_insn_and_split "*cond_len_extend<v_double_trunc><mode>"
>     +  [(set (match_operand:VWEXTF_ZVFHMIN 0 "register_operand")
>     +    (if_then_else:VWEXTF_ZVFHMIN
>     +      (unspec:<VM>
>     +        [(match_operand 4 "vector_length_operand")
>     +         (match_operand 5 "const_int_operand")
>     +         (match_operand 6 "const_int_operand")
>     +         (reg:SI VL_REGNUM)
>     +         (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
>     +      (vec_merge:VWEXTF_ZVFHMIN
>     +        (float_extend:VWEXTF_ZVFHMIN
>     (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))
>     +        (match_operand:VWEXTF_ZVFHMIN 1 "vector_merge_operand")
>     + (match_operand:<VM> 3 "register_operand"))
>     +      (match_dup 1)))]
>     +  "TARGET_VECTOR"
>     +  "#"
>     +  "&& 1"
>     +  [(const_int 0)]
>     +{
>     +  emit_insn (gen_pred_extend<mode> (operands[0], operands[3],
>     operands[1], operands[2],
>     +                                    operands[4], operands[5],
>     operands[6], CONST0_RTX (Pmode)));
>     +  DONE;
>     +}
>     +[(set_attr "type" "vector")])
>     +
>     ;; Combine FP trunc(vf2) + vcond_mask
>     (define_insn_and_split "*cond_trunc<mode><v_double_trunc>"
>         [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand")
>     @@ -1151,6 +1255,61 @@
>     }
>     [(set_attr "type" "vector")])
>     +;; Combine mask_len_extend + vredsum to mask_vwredsum[u]
>     +;; where the mrege of mask_len_extend is vector const 0
>     +(define_insn_and_split "*cond_len_widen_reduc_plus_scal_<mode>"
>     +  [(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand")
>     +        (unspec:<V_DOUBLE_EXTEND_VEL> [
>     +          (if_then_else:<V_DOUBLE_EXTEND>
>     +            (unspec:<VM> [
>     +              (match_operand 2 "vector_length_operand")
>     +              (const_int 0)
>     +              (const_int 0)
>     +              (reg:SI VL_REGNUM)
>     +              (reg:SI VTYPE_REGNUM)
>     +            ] UNSPEC_VPREDICATE)
>     +            (vec_merge:<V_DOUBLE_EXTEND>
>     +              (any_extend:<V_DOUBLE_EXTEND>
>     +                (match_operand:VI_QHS_NO_M8 3 "register_operand"))
>     +              (if_then_else:<V_DOUBLE_EXTEND>
>     +                (unspec:<VM> [
>     +                  (match_operand:<VM> 4
>     "vector_all_trues_mask_operand")
>     +                  (match_operand 5 "vector_length_operand")
>     +                  (match_operand 6 "const_int_operand")
>     +                  (match_operand 7 "const_int_operand")
>     +                  (match_operand 8 "const_1_or_2_operand")
>     +                  (reg:SI VL_REGNUM)
>     +                  (reg:SI VTYPE_REGNUM)
>     +                ] UNSPEC_VPREDICATE)
>     +                (match_operand:<V_DOUBLE_EXTEND> 9
>     "vector_const_0_operand")
>     +                (match_operand:<V_DOUBLE_EXTEND> 10
>     "vector_merge_operand"))
>     +              (match_operand:<VM> 1 "register_operand"))
>     +            (if_then_else:<V_DOUBLE_EXTEND>
>     +              (unspec:<VM> [
>     +                (match_dup 4)
>     +                (match_dup 5)
>     +                (match_dup 6)
>     +                (match_dup 7)
>     +                (match_dup 8)
>     +                (reg:SI VL_REGNUM)
>     +                (reg:SI VTYPE_REGNUM)
>     +              ] UNSPEC_VPREDICATE)
>     +              (match_dup 9)
>     +              (match_dup 10)))
>     +        ] UNSPEC_REDUC_SUM))]
>     +  "TARGET_VECTOR && can_create_pseudo_p ()"
>     +  "#"
>     +  "&& 1"
>     +  [(const_int 0)]
>     +{
>     +  rtx ops[] = {operands[0], operands[3], operands[1], operands[2]};
>     +  riscv_vector::expand_reduction (<WREDUC_UNSPEC>,
>     +                                  riscv_vector::REDUCE_OP_M,
>     +                                  ops, CONST0_RTX
>     (<V_DOUBLE_EXTEND_VEL>mode));
>     +  DONE;
>     +}
>     +[(set_attr "type" "vector")])
>     +
>     ;; Combine mask_extend + vfredsum to mask_vfwredusum
>     ;; where the mrege of mask_extend is vector const 0
>     (define_insn_and_split "*cond_widen_reduc_plus_scal_<mode>"
>     @@ -1187,6 +1346,61 @@
>     }
>     [(set_attr "type" "vector")])
>     +;; Combine mask_len_extend + vredsum to mask_vwredsum[u]
>     +;; where the mrege of mask_len_extend is vector const 0
>     +(define_insn_and_split "*cond_len_widen_reduc_plus_scal_<mode>"
>     +  [(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand")
>     +        (unspec:<V_DOUBLE_EXTEND_VEL> [
>     +          (if_then_else:<V_DOUBLE_EXTEND>
>     +            (unspec:<VM> [
>     +              (match_operand 2 "vector_length_operand")
>     +              (const_int 0)
>     +              (const_int 0)
>     +              (reg:SI VL_REGNUM)
>     +              (reg:SI VTYPE_REGNUM)
>     +            ] UNSPEC_VPREDICATE)
>     +            (vec_merge:<V_DOUBLE_EXTEND>
>     +              (float_extend:<V_DOUBLE_EXTEND>
>     +                (match_operand:VF_HS_NO_M8 3 "register_operand"))
>     +              (if_then_else:<V_DOUBLE_EXTEND>
>     +                (unspec:<VM> [
>     +                  (match_operand:<VM> 4
>     "vector_all_trues_mask_operand")
>     +                  (match_operand 5 "vector_length_operand")
>     +                  (match_operand 6 "const_int_operand")
>     +                  (match_operand 7 "const_int_operand")
>     +                  (match_operand 8 "const_1_or_2_operand")
>     +                  (reg:SI VL_REGNUM)
>     +                  (reg:SI VTYPE_REGNUM)
>     +                ] UNSPEC_VPREDICATE)
>     +                (match_operand:<V_DOUBLE_EXTEND> 9
>     "vector_const_0_operand")
>     +                (match_operand:<V_DOUBLE_EXTEND> 10
>     "vector_merge_operand"))
>     +              (match_operand:<VM> 1 "register_operand"))
>     +            (if_then_else:<V_DOUBLE_EXTEND>
>     +              (unspec:<VM> [
>     +                (match_dup 4)
>     +                (match_dup 5)
>     +                (match_dup 6)
>     +                (match_dup 7)
>     +                (match_dup 8)
>     +                (reg:SI VL_REGNUM)
>     +                (reg:SI VTYPE_REGNUM)
>     +              ] UNSPEC_VPREDICATE)
>     +              (match_dup 9)
>     +              (match_dup 10)))
>     +        ] UNSPEC_REDUC_SUM_UNORDERED))]
>     +  "TARGET_VECTOR && can_create_pseudo_p ()"
>     +  "#"
>     +  "&& 1"
>     +  [(const_int 0)]
>     +{
>     +  rtx ops[] = {operands[0], operands[3], operands[1], operands[2]};
>     +  riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_UNORDERED,
>     +                                  riscv_vector::REDUCE_OP_M_FRM_DYN,
>     +                                  ops, CONST0_RTX
>     (<V_DOUBLE_EXTEND_VEL>mode));
>     +  DONE;
>     +}
>     +[(set_attr "type" "vector")])
>     +
>     ;;
>     =============================================================================
>     ;; Misc combine patterns
>     ;;
>     =============================================================================
>     diff --git
>     a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c
>     index 22a71048684..47889f3a1cd 100644
>     ---
>     a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c
>     +++
>     b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c
>     @@ -15,16 +15,27 @@
>     #define
>     TEST_ALL(TEST)                                                         \
>         TEST (int16_t, int8_t,
>     16)                                                   \
>     +  TEST (int32_t, int8_t,
>     8)                                                    \
>         TEST (int32_t, int16_t,
>     8)                                                   \
>     +  TEST (int64_t, int8_t,
>     4)                                                    \
>     +  TEST (int64_t, int16_t,
>     4)                                                   \
>         TEST (int64_t, int32_t,
>     4)                                                   \
>         TEST (uint16_t, uint8_t,
>     16)                                                 \
>     +  TEST (uint32_t, uint8_t,
>     8)                                                  \
>         TEST (uint32_t, uint16_t,
>     8)                                                 \
>     +  TEST (uint64_t, uint8_t,
>     4)                                                  \
>     +  TEST (uint64_t, uint16_t,
>     4)                                                 \
>         TEST (uint64_t, uint32_t,
>     4)                                                 \
>         TEST (float, _Float16,
>     8)                                                    \
>     +  TEST (double, _Float16,
>     4)                                                   \
>         TEST (double, float, 4)
>     TEST_ALL (TEST_TYPE)
>     -/* { dg-final { scan-assembler-times
>     {\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 2 } } */
>     +/* { dg-final { scan-assembler-times
>     {\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
>     /* { dg-final { scan-assembler-times
>     {\tvwredsum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
>     /* { dg-final { scan-assembler-times
>     {\tvwredsumu\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
>     +/* { dg-final { scan-assembler-times
>     {\tvsext\.vf4\tv[0-9]+,v[0-9]+,v0\.t} 2 } } */
>     +/* { dg-final { scan-assembler-times
>     {\tvsext\.vf8\tv[0-9]+,v[0-9]+,v0\.t} 1 } } */
>     +/* { dg-final { scan-assembler-times
>     {\tvzext\.vf4\tv[0-9]+,v[0-9]+,v0\.t} 2 } } */
>     +/* { dg-final { scan-assembler-times
>     {\tvzext\.vf8\tv[0-9]+,v[0-9]+,v0\.t} 1 } } */
>     diff --git
>     a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c
>     index 7c8fedd072b..662d1351215 100644
>     ---
>     a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c
>     +++
>     b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c
>     @@ -1,30 +1,12 @@
>     /* { dg-do compile } */
>     /* { dg-additional-options "-march=rv64gcv_zvfh_zvl128b -mabi=lp64d
>     --param riscv-autovec-preference=scalable --param
>     riscv-autovec-lmul=m2 -fno-vect-cost-model -ffast-math" } */
>     -#include <stdint-gcc.h>
>     -#define TEST_TYPE(TYPE1, TYPE2,
>     N)                                             \
>     -  __attribute__
>     ((noipa))                                                      \
>     -  TYPE1 reduc_##TYPE1##_##TYPE2 (TYPE2 *restrict a, TYPE2 *restrict
>     pred)      \
>     - 
>     {                                                                            \
>     -    TYPE1 sum =
>     0;                                                             \
>     -    for (int i = 0; i < N; i +=
>     1)                                             \
>     -      if
>     (pred[i])                                                             \
>     - sum +=
>     a[i];                                                           \
>     -    return
>     sum;                                                                \
>     -  }
>     +#include "cond_widen_reduc-1.c"
>     -#define
>     TEST_ALL(TEST)                                                         \
>     -  TEST (int16_t, int8_t,
>     16)                                                   \
>     -  TEST (int32_t, int16_t,
>     8)                                                   \
>     -  TEST (int64_t, int32_t,
>     4)                                                   \
>     -  TEST (uint16_t, uint8_t,
>     16)                                                 \
>     -  TEST (uint32_t, uint16_t,
>     8)                                                 \
>     -  TEST (uint64_t, uint32_t,
>     4)                                                 \
>     -  TEST (float, _Float16,
>     8)                                                    \
>     -  TEST (double, float, 4)
>     -
>     -TEST_ALL (TEST_TYPE)
>     -
>     -/* { dg-final { scan-assembler-times
>     {\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 2 } } */
>     +/* { dg-final { scan-assembler-times
>     {\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
>     /* { dg-final { scan-assembler-times
>     {\tvwredsum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
>     /* { dg-final { scan-assembler-times
>     {\tvwredsumu\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
>     +/* { dg-final { scan-assembler-times
>     {\tvsext\.vf4\tv[0-9]+,v[0-9]+,v0\.t} 2 } } */
>     +/* { dg-final { scan-assembler-times
>     {\tvsext\.vf8\tv[0-9]+,v[0-9]+,v0\.t} 1 } } */
>     +/* { dg-final { scan-assembler-times
>     {\tvzext\.vf4\tv[0-9]+,v[0-9]+,v0\.t} 2 } } */
>     +/* { dg-final { scan-assembler-times
>     {\tvzext\.vf8\tv[0-9]+,v[0-9]+,v0\.t} 1 } } */
>     -- 
>     2.36.3
>
  
Robin Dapp Nov. 7, 2023, 7:57 a.m. UTC | #3
Thanks, what I was slightly concerned about is that we now have
the implicit assumption that the initial value is 0.  I mean
that's what the vectorizer does for reductions but theoretically,
wouldn't we also combine other values into 0 now?

Regards
 Robin
  
Lehua Ding Nov. 7, 2023, 8:04 a.m. UTC | #4
Hi Robin,

On 2023/11/7 15:57, Robin Dapp wrote:
> Thanks, what I was slightly concerned about is that we now have
> the implicit assumption that the initial value is 0.  I mean
> that's what the vectorizer does for reductions but theoretically,
> wouldn't we also combine other values into 0 now?

Sorry, I'm not understanding what you mean. I think it's only safe to do 
this combine if it's initialized to 0. Because this combine actually 
throws away the operation of adding 0 (via mask operand).
  

Patch

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index d0f8b3cde4e..3c87e66ea49 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -194,6 +194,84 @@ 
 }
 [(set_attr "type" "vector")])
 
+;; Combine sign_extend/zero_extend(vf2) and vcond_mask_len
+(define_insn_and_split "*cond_len_<optab><v_double_trunc><mode>"
+  [(set (match_operand:VWEXTI 0 "register_operand")
+    (if_then_else:VWEXTI
+      (unspec:<VM>
+        [(match_operand 4 "vector_length_operand")
+         (match_operand 5 "const_int_operand")
+         (match_operand 6 "const_int_operand")
+         (reg:SI VL_REGNUM)
+         (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+      (vec_merge:VWEXTI
+        (any_extend:VWEXTI (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))
+        (match_operand:VWEXTI 1 "vector_merge_operand")
+	(match_operand:<VM> 3 "register_operand"))
+      (match_dup 1)))]
+  "TARGET_VECTOR"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  emit_insn (gen_pred_<optab><mode>_vf2 (operands[0], operands[3], operands[1], operands[2],
+                                         operands[4], operands[5], operands[6], CONST0_RTX (Pmode)));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
+;; Combine sign_extend/zero_extend(vf4) and vcond_mask_len
+(define_insn_and_split "*cond_len_<optab><v_quad_trunc><mode>"
+  [(set (match_operand:VQEXTI 0 "register_operand")
+    (if_then_else:VQEXTI
+      (unspec:<VM>
+        [(match_operand 4 "vector_length_operand")
+         (match_operand 5 "const_int_operand")
+         (match_operand 6 "const_int_operand")
+         (reg:SI VL_REGNUM)
+         (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+      (vec_merge:VQEXTI
+        (any_extend:VQEXTI (match_operand:<V_QUAD_TRUNC> 2 "register_operand"))
+        (match_operand:VQEXTI 1 "vector_merge_operand")
+	(match_operand:<VM> 3 "register_operand"))
+      (match_dup 1)))]
+  "TARGET_VECTOR"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  emit_insn (gen_pred_<optab><mode>_vf4 (operands[0], operands[3], operands[1], operands[2],
+                                         operands[4], operands[5], operands[6], CONST0_RTX (Pmode)));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
+;; Combine sign_extend/zero_extend(vf8) and vcond_mask_len
+(define_insn_and_split "*cond_len_<optab><v_oct_trunc><mode>"
+  [(set (match_operand:VOEXTI 0 "register_operand")
+    (if_then_else:VOEXTI
+      (unspec:<VM>
+        [(match_operand 4 "vector_length_operand")
+         (match_operand 5 "const_int_operand")
+         (match_operand 6 "const_int_operand")
+         (reg:SI VL_REGNUM)
+         (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+      (vec_merge:VOEXTI
+        (any_extend:VOEXTI (match_operand:<V_OCT_TRUNC> 2 "register_operand"))
+        (match_operand:VOEXTI 1 "vector_merge_operand")
+	(match_operand:<VM> 3 "register_operand"))
+      (match_dup 1)))]
+  "TARGET_VECTOR"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  emit_insn (gen_pred_<optab><mode>_vf8 (operands[0], operands[3], operands[1], operands[2],
+                                         operands[4], operands[5], operands[6], CONST0_RTX (Pmode)));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
 ;; Combine trunc(vf2) + vcond_mask
 (define_insn_and_split "*cond_trunc<mode><v_double_trunc>"
   [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand")
@@ -235,6 +313,32 @@ 
 }
 [(set_attr "type" "vector")])
 
+;; Combine FP extend(vf2) and vcond_mask_len
+(define_insn_and_split "*cond_len_extend<v_double_trunc><mode>"
+  [(set (match_operand:VWEXTF_ZVFHMIN 0 "register_operand")
+    (if_then_else:VWEXTF_ZVFHMIN
+      (unspec:<VM>
+        [(match_operand 4 "vector_length_operand")
+         (match_operand 5 "const_int_operand")
+         (match_operand 6 "const_int_operand")
+         (reg:SI VL_REGNUM)
+         (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+      (vec_merge:VWEXTF_ZVFHMIN
+        (float_extend:VWEXTF_ZVFHMIN (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))
+        (match_operand:VWEXTF_ZVFHMIN 1 "vector_merge_operand")
+	(match_operand:<VM> 3 "register_operand"))
+      (match_dup 1)))]
+  "TARGET_VECTOR"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  emit_insn (gen_pred_extend<mode> (operands[0], operands[3], operands[1], operands[2],
+                                    operands[4], operands[5], operands[6], CONST0_RTX (Pmode)));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
 ;; Combine FP trunc(vf2) + vcond_mask
 (define_insn_and_split "*cond_trunc<mode><v_double_trunc>"
   [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand")
@@ -1151,6 +1255,61 @@ 
 }
 [(set_attr "type" "vector")])
 
+;; Combine mask_len_extend + vredsum to mask_vwredsum[u]
+;; where the mrege of mask_len_extend is vector const 0
+(define_insn_and_split "*cond_len_widen_reduc_plus_scal_<mode>"
+  [(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand")
+        (unspec:<V_DOUBLE_EXTEND_VEL> [
+          (if_then_else:<V_DOUBLE_EXTEND>
+            (unspec:<VM> [
+              (match_operand 2 "vector_length_operand")
+              (const_int 0)
+              (const_int 0)
+              (reg:SI VL_REGNUM)
+              (reg:SI VTYPE_REGNUM)
+            ] UNSPEC_VPREDICATE)
+            (vec_merge:<V_DOUBLE_EXTEND>
+              (any_extend:<V_DOUBLE_EXTEND>
+                (match_operand:VI_QHS_NO_M8 3 "register_operand"))
+              (if_then_else:<V_DOUBLE_EXTEND>
+                (unspec:<VM> [
+                  (match_operand:<VM> 4 "vector_all_trues_mask_operand")
+                  (match_operand 5 "vector_length_operand")
+                  (match_operand 6 "const_int_operand")
+                  (match_operand 7 "const_int_operand")
+                  (match_operand 8 "const_1_or_2_operand")
+                  (reg:SI VL_REGNUM)
+                  (reg:SI VTYPE_REGNUM)
+                ] UNSPEC_VPREDICATE)
+                (match_operand:<V_DOUBLE_EXTEND> 9 "vector_const_0_operand")
+                (match_operand:<V_DOUBLE_EXTEND> 10 "vector_merge_operand"))
+              (match_operand:<VM> 1 "register_operand"))
+            (if_then_else:<V_DOUBLE_EXTEND>
+              (unspec:<VM> [
+                (match_dup 4)
+                (match_dup 5)
+                (match_dup 6)
+                (match_dup 7)
+                (match_dup 8)
+                (reg:SI VL_REGNUM)
+                (reg:SI VTYPE_REGNUM)
+              ] UNSPEC_VPREDICATE)
+              (match_dup 9)
+              (match_dup 10)))
+        ] UNSPEC_REDUC_SUM))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx ops[] = {operands[0], operands[3], operands[1], operands[2]};
+  riscv_vector::expand_reduction (<WREDUC_UNSPEC>,
+                                  riscv_vector::REDUCE_OP_M,
+                                  ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
 ;; Combine mask_extend + vfredsum to mask_vfwredusum
 ;; where the mrege of mask_extend is vector const 0
 (define_insn_and_split "*cond_widen_reduc_plus_scal_<mode>"
@@ -1187,6 +1346,61 @@ 
 }
 [(set_attr "type" "vector")])
 
+;; Combine mask_len_extend + vredsum to mask_vwredsum[u]
+;; where the mrege of mask_len_extend is vector const 0
+(define_insn_and_split "*cond_len_widen_reduc_plus_scal_<mode>"
+  [(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand")
+        (unspec:<V_DOUBLE_EXTEND_VEL> [
+          (if_then_else:<V_DOUBLE_EXTEND>
+            (unspec:<VM> [
+              (match_operand 2 "vector_length_operand")
+              (const_int 0)
+              (const_int 0)
+              (reg:SI VL_REGNUM)
+              (reg:SI VTYPE_REGNUM)
+            ] UNSPEC_VPREDICATE)
+            (vec_merge:<V_DOUBLE_EXTEND>
+              (float_extend:<V_DOUBLE_EXTEND>
+                (match_operand:VF_HS_NO_M8 3 "register_operand"))
+              (if_then_else:<V_DOUBLE_EXTEND>
+                (unspec:<VM> [
+                  (match_operand:<VM> 4 "vector_all_trues_mask_operand")
+                  (match_operand 5 "vector_length_operand")
+                  (match_operand 6 "const_int_operand")
+                  (match_operand 7 "const_int_operand")
+                  (match_operand 8 "const_1_or_2_operand")
+                  (reg:SI VL_REGNUM)
+                  (reg:SI VTYPE_REGNUM)
+                ] UNSPEC_VPREDICATE)
+                (match_operand:<V_DOUBLE_EXTEND> 9 "vector_const_0_operand")
+                (match_operand:<V_DOUBLE_EXTEND> 10 "vector_merge_operand"))
+              (match_operand:<VM> 1 "register_operand"))
+            (if_then_else:<V_DOUBLE_EXTEND>
+              (unspec:<VM> [
+                (match_dup 4)
+                (match_dup 5)
+                (match_dup 6)
+                (match_dup 7)
+                (match_dup 8)
+                (reg:SI VL_REGNUM)
+                (reg:SI VTYPE_REGNUM)
+              ] UNSPEC_VPREDICATE)
+              (match_dup 9)
+              (match_dup 10)))
+        ] UNSPEC_REDUC_SUM_UNORDERED))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx ops[] = {operands[0], operands[3], operands[1], operands[2]};
+  riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_UNORDERED,
+                                  riscv_vector::REDUCE_OP_M_FRM_DYN,
+                                  ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
 ;; =============================================================================
 ;; Misc combine patterns
 ;; =============================================================================
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c
index 22a71048684..47889f3a1cd 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c
@@ -15,16 +15,27 @@ 
 
 #define TEST_ALL(TEST)                                                         \
   TEST (int16_t, int8_t, 16)                                                   \
+  TEST (int32_t, int8_t, 8)                                                    \
   TEST (int32_t, int16_t, 8)                                                   \
+  TEST (int64_t, int8_t, 4)                                                    \
+  TEST (int64_t, int16_t, 4)                                                   \
   TEST (int64_t, int32_t, 4)                                                   \
   TEST (uint16_t, uint8_t, 16)                                                 \
+  TEST (uint32_t, uint8_t, 8)                                                  \
   TEST (uint32_t, uint16_t, 8)                                                 \
+  TEST (uint64_t, uint8_t, 4)                                                  \
+  TEST (uint64_t, uint16_t, 4)                                                 \
   TEST (uint64_t, uint32_t, 4)                                                 \
   TEST (float, _Float16, 8)                                                    \
+  TEST (double, _Float16, 4)                                                   \
   TEST (double, float, 4)
 
 TEST_ALL (TEST_TYPE)
 
-/* { dg-final { scan-assembler-times {\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
 /* { dg-final { scan-assembler-times {\tvwredsum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
 /* { dg-final { scan-assembler-times {\tvwredsumu\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
+/* { dg-final { scan-assembler-times {\tvsext\.vf4\tv[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvsext\.vf8\tv[0-9]+,v[0-9]+,v0\.t} 1 } } */
+/* { dg-final { scan-assembler-times {\tvzext\.vf4\tv[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvzext\.vf8\tv[0-9]+,v[0-9]+,v0\.t} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c
index 7c8fedd072b..662d1351215 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c
@@ -1,30 +1,12 @@ 
 /* { dg-do compile } */
 /* { dg-additional-options "-march=rv64gcv_zvfh_zvl128b -mabi=lp64d --param riscv-autovec-preference=scalable --param riscv-autovec-lmul=m2 -fno-vect-cost-model -ffast-math" } */
-#include <stdint-gcc.h>
 
-#define TEST_TYPE(TYPE1, TYPE2, N)                                             \
-  __attribute__ ((noipa))                                                      \
-  TYPE1 reduc_##TYPE1##_##TYPE2 (TYPE2 *restrict a, TYPE2 *restrict pred)      \
-  {                                                                            \
-    TYPE1 sum = 0;                                                             \
-    for (int i = 0; i < N; i += 1)                                             \
-      if (pred[i])                                                             \
-	sum += a[i];                                                           \
-    return sum;                                                                \
-  }
+#include "cond_widen_reduc-1.c"
 
-#define TEST_ALL(TEST)                                                         \
-  TEST (int16_t, int8_t, 16)                                                   \
-  TEST (int32_t, int16_t, 8)                                                   \
-  TEST (int64_t, int32_t, 4)                                                   \
-  TEST (uint16_t, uint8_t, 16)                                                 \
-  TEST (uint32_t, uint16_t, 8)                                                 \
-  TEST (uint64_t, uint32_t, 4)                                                 \
-  TEST (float, _Float16, 8)                                                    \
-  TEST (double, float, 4)
-
-TEST_ALL (TEST_TYPE)
-
-/* { dg-final { scan-assembler-times {\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
 /* { dg-final { scan-assembler-times {\tvwredsum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
 /* { dg-final { scan-assembler-times {\tvwredsumu\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
+/* { dg-final { scan-assembler-times {\tvsext\.vf4\tv[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvsext\.vf8\tv[0-9]+,v[0-9]+,v0\.t} 1 } } */
+/* { dg-final { scan-assembler-times {\tvzext\.vf4\tv[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvzext\.vf8\tv[0-9]+,v[0-9]+,v0\.t} 1 } } */