RISC-V: Add RVV narrow shift right lowering auto-vectorization
Checks
Commit Message
From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
Optimize the following auto-vectorization codes:
void foo (int16_t * __restrict a, int32_t * __restrict b, int32_t c, int n)
{
for (int i = 0; i < n; i++)
a[i] = b[i] >> c;
}
Before this patch:
foo:
ble a3,zero,.L5
.L3:
vsetvli a5,a3,e32,m1,ta,ma
vle32.v v1,0(a1)
vsetvli a4,zero,e32,m1,ta,ma
vsra.vx v1,v1,a2
vsetvli zero,zero,e16,mf2,ta,ma
slli a7,a5,2
vncvt.x.x.w v1,v1
slli a6,a5,1
vsetvli zero,a5,e16,mf2,ta,ma
sub a3,a3,a5
vse16.v v1,0(a0)
add a1,a1,a7
add a0,a0,a6
bne a3,zero,.L3
.L5:
ret
After this patch:
foo:
ble a3,zero,.L5
.L3:
vsetvli a5,a3,e32,m1,ta,ma
vle32.v v1,0(a1)
vsetvli a7,zero,e16,mf2,ta,ma
slli a6,a5,2
vnsra.wx v1,v1,a2
slli a4,a5,1
vsetvli zero,a5,e16,mf2,ta,ma
sub a3,a3,a5
vse16.v v1,0(a0)
add a1,a1,a6
add a0,a0,a4
bne a3,zero,.L3
.L5:
ret
gcc/ChangeLog:
* config/riscv/autovec-opt.md (*v<any_shiftrt:optab><any_extend:optab>trunc<mode>): New pattern.
(*<any_shiftrt:optab>trunc<mode>): Ditto.
* config/riscv/autovec.md (<optab><mode>3): Change to define_insn_and_split.
(v<optab><mode>3): Ditto.
(trunc<mode><v_double_trunc>2): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/binop/narrow-1.c: New test.
* gcc.target/riscv/rvv/autovec/binop/narrow-2.c: New test.
* gcc.target/riscv/rvv/autovec/binop/narrow-3.c: New test.
* gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c: New test.
* gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c: New test.
* gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c: New test.
---
gcc/config/riscv/autovec-opt.md | 46 +++++++++++++++++
gcc/config/riscv/autovec.md | 43 ++++++++++------
.../riscv/rvv/autovec/binop/narrow-1.c | 31 ++++++++++++
.../riscv/rvv/autovec/binop/narrow-2.c | 32 ++++++++++++
.../riscv/rvv/autovec/binop/narrow-3.c | 31 ++++++++++++
.../riscv/rvv/autovec/binop/narrow_run-1.c | 50 +++++++++++++++++++
.../riscv/rvv/autovec/binop/narrow_run-2.c | 46 +++++++++++++++++
.../riscv/rvv/autovec/binop/narrow_run-3.c | 46 +++++++++++++++++
8 files changed, 311 insertions(+), 14 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c
Comments
Is this patch ok for trunk?
juzhe.zhong@rivai.ai
From: juzhe.zhong
Date: 2023-06-12 10:41
To: gcc-patches
CC: kito.cheng; kito.cheng; palmer; palmer; jeffreyalaw; rdapp.gcc; Juzhe-Zhong
Subject: [PATCH] RISC-V: Add RVV narrow shift right lowering auto-vectorization
From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
Optimize the following auto-vectorization codes:
void foo (int16_t * __restrict a, int32_t * __restrict b, int32_t c, int n)
{
for (int i = 0; i < n; i++)
a[i] = b[i] >> c;
}
Before this patch:
foo:
ble a3,zero,.L5
.L3:
vsetvli a5,a3,e32,m1,ta,ma
vle32.v v1,0(a1)
vsetvli a4,zero,e32,m1,ta,ma
vsra.vx v1,v1,a2
vsetvli zero,zero,e16,mf2,ta,ma
slli a7,a5,2
vncvt.x.x.w v1,v1
slli a6,a5,1
vsetvli zero,a5,e16,mf2,ta,ma
sub a3,a3,a5
vse16.v v1,0(a0)
add a1,a1,a7
add a0,a0,a6
bne a3,zero,.L3
.L5:
ret
After this patch:
foo:
ble a3,zero,.L5
.L3:
vsetvli a5,a3,e32,m1,ta,ma
vle32.v v1,0(a1)
vsetvli a7,zero,e16,mf2,ta,ma
slli a6,a5,2
vnsra.wx v1,v1,a2
slli a4,a5,1
vsetvli zero,a5,e16,mf2,ta,ma
sub a3,a3,a5
vse16.v v1,0(a0)
add a1,a1,a6
add a0,a0,a4
bne a3,zero,.L3
.L5:
ret
gcc/ChangeLog:
* config/riscv/autovec-opt.md (*v<any_shiftrt:optab><any_extend:optab>trunc<mode>): New pattern.
(*<any_shiftrt:optab>trunc<mode>): Ditto.
* config/riscv/autovec.md (<optab><mode>3): Change to define_insn_and_split.
(v<optab><mode>3): Ditto.
(trunc<mode><v_double_trunc>2): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/binop/narrow-1.c: New test.
* gcc.target/riscv/rvv/autovec/binop/narrow-2.c: New test.
* gcc.target/riscv/rvv/autovec/binop/narrow-3.c: New test.
* gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c: New test.
* gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c: New test.
* gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c: New test.
---
gcc/config/riscv/autovec-opt.md | 46 +++++++++++++++++
gcc/config/riscv/autovec.md | 43 ++++++++++------
.../riscv/rvv/autovec/binop/narrow-1.c | 31 ++++++++++++
.../riscv/rvv/autovec/binop/narrow-2.c | 32 ++++++++++++
.../riscv/rvv/autovec/binop/narrow-3.c | 31 ++++++++++++
.../riscv/rvv/autovec/binop/narrow_run-1.c | 50 +++++++++++++++++++
.../riscv/rvv/autovec/binop/narrow_run-2.c | 46 +++++++++++++++++
.../riscv/rvv/autovec/binop/narrow_run-3.c | 46 +++++++++++++++++
8 files changed, 311 insertions(+), 14 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c
diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 7bb93eed220..aef28e445e1 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -330,3 +330,49 @@
}
[(set_attr "type" "viwmuladd")
(set_attr "mode" "<V_DOUBLE_TRUNC>")])
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Binary narrow shifts.
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - vnsrl.wv/vnsrl.wx/vnsrl.wi
+;; - vnsra.wv/vnsra.wx/vnsra.wi
+;; -------------------------------------------------------------------------
+
+(define_insn_and_split "*v<any_shiftrt:optab><any_extend:optab>trunc<mode>"
+ [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand" "=vr,vr")
+ (truncate:<V_DOUBLE_TRUNC>
+ (any_shiftrt:VWEXTI
+ (match_operand:VWEXTI 1 "register_operand" " vr,vr")
+ (any_extend:VWEXTI
+ (match_operand:<V_DOUBLE_TRUNC> 2 "vector_shift_operand" " vr,vk")))))]
+ "TARGET_VECTOR"
+ "#"
+ "&& can_create_pseudo_p ()"
+ [(const_int 0)]
+{
+ insn_code icode = code_for_pred_narrow (<any_shiftrt:CODE>, <MODE>mode);
+ riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_BINOP, operands);
+ DONE;
+}
+ [(set_attr "type" "vnshift")
+ (set_attr "mode" "<V_DOUBLE_TRUNC>")])
+
+(define_insn_and_split "*<any_shiftrt:optab>trunc<mode>"
+ [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand" "=vr")
+ (truncate:<V_DOUBLE_TRUNC>
+ (any_shiftrt:VWEXTI
+ (match_operand:VWEXTI 1 "register_operand" " vr")
+ (match_operand:<VEL> 2 "csr_operand" " rK"))))]
+ "TARGET_VECTOR"
+ "#"
+ "&& can_create_pseudo_p ()"
+ [(const_int 0)]
+{
+ operands[2] = gen_lowpart (Pmode, operands[2]);
+ insn_code icode = code_for_pred_narrow_scalar (<any_shiftrt:CODE>, <MODE>mode);
+ riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_BINOP, operands);
+ DONE;
+}
+ [(set_attr "type" "vnshift")
+ (set_attr "mode" "<V_DOUBLE_TRUNC>")])
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index b7070099f29..eadc2c5b595 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -150,18 +150,23 @@
;; - vsll.vi/vsra.vi/vsrl.vi
;; -------------------------------------------------------------------------
-(define_expand "<optab><mode>3"
- [(set (match_operand:VI 0 "register_operand")
+(define_insn_and_split "<optab><mode>3"
+ [(set (match_operand:VI 0 "register_operand" "=vr")
(any_shift:VI
- (match_operand:VI 1 "register_operand")
- (match_operand:<VEL> 2 "csr_operand")))]
+ (match_operand:VI 1 "register_operand" " vr")
+ (match_operand:<VEL> 2 "csr_operand" " rK")))]
"TARGET_VECTOR"
+ "#"
+ "&& can_create_pseudo_p ()"
+ [(const_int 0)]
{
operands[2] = gen_lowpart (Pmode, operands[2]);
riscv_vector::emit_vlmax_insn (code_for_pred_scalar (<CODE>, <MODE>mode),
riscv_vector::RVV_BINOP, operands);
DONE;
-})
+}
+ [(set_attr "type" "vshift")
+ (set_attr "mode" "<MODE>")])
;; -------------------------------------------------------------------------
;; ---- [INT] Binary shifts by scalar.
@@ -170,17 +175,22 @@
;; - vsll.vv/vsra.vv/vsrl.vv
;; -------------------------------------------------------------------------
-(define_expand "v<optab><mode>3"
- [(set (match_operand:VI 0 "register_operand")
+(define_insn_and_split "v<optab><mode>3"
+ [(set (match_operand:VI 0 "register_operand" "=vr,vr")
(any_shift:VI
- (match_operand:VI 1 "register_operand")
- (match_operand:VI 2 "vector_shift_operand")))]
+ (match_operand:VI 1 "register_operand" " vr,vr")
+ (match_operand:VI 2 "vector_shift_operand" " vr,vk")))]
"TARGET_VECTOR"
+ "#"
+ "&& can_create_pseudo_p ()"
+ [(const_int 0)]
{
riscv_vector::emit_vlmax_insn (code_for_pred (<CODE>, <MODE>mode),
riscv_vector::RVV_BINOP, operands);
DONE;
-})
+}
+ [(set_attr "type" "vshift")
+ (set_attr "mode" "<MODE>")])
;; -------------------------------------------------------------------------
;; ---- [BOOL] Binary logical operations
@@ -395,16 +405,21 @@
;; -------------------------------------------------------------------------
;; - vncvt.x.x.w
;; -------------------------------------------------------------------------
-(define_expand "trunc<mode><v_double_trunc>2"
- [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand")
+(define_insn_and_split "trunc<mode><v_double_trunc>2"
+ [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand" "=vr")
(truncate:<V_DOUBLE_TRUNC>
- (match_operand:VWEXTI 1 "register_operand")))]
+ (match_operand:VWEXTI 1 "register_operand" " vr")))]
"TARGET_VECTOR"
+ "#"
+ "&& can_create_pseudo_p ()"
+ [(const_int 0)]
{
insn_code icode = code_for_pred_trunc (<MODE>mode);
riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, operands);
DONE;
-})
+}
+ [(set_attr "type" "vshift")
+ (set_attr "mode" "<MODE>")])
;; -------------------------------------------------------------------------
;; Truncation to a mode whose inner mode size is a quarter of mode's.
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-1.c
new file mode 100644
index 00000000000..3de8d85b52d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-1.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
+
+#include <stdint-gcc.h>
+
+#define TEST_TYPE(TYPE1, TYPE2) \
+ __attribute__ ((noipa)) void vnshift_##TYPE1##_##TYPE2 ( \
+ TYPE2 *__restrict dst, TYPE1 *__restrict a, TYPE2 *__restrict b, int n) \
+ { \
+ for (int i = 0; i < n; i++) \
+ dst[i] = a[i] >> b[i]; \
+ }
+
+#define TEST_ALL() \
+ TEST_TYPE (int16_t, int8_t) \
+ TEST_TYPE (int16_t, uint8_t) \
+ TEST_TYPE (uint16_t, int8_t) \
+ TEST_TYPE (uint16_t, uint8_t) \
+ TEST_TYPE (int32_t, int16_t) \
+ TEST_TYPE (int32_t, uint16_t) \
+ TEST_TYPE (uint32_t, int16_t) \
+ TEST_TYPE (uint32_t, uint16_t) \
+ TEST_TYPE (int64_t, int32_t) \
+ TEST_TYPE (int64_t, uint32_t) \
+ TEST_TYPE (uint64_t, int32_t) \
+ TEST_TYPE (uint64_t, uint32_t)
+
+TEST_ALL ()
+
+/* { dg-final { scan-assembler-times {\tvnsra\.wv} 6 } } */
+/* { dg-final { scan-assembler-times {\tvnsrl\.wv} 5 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-2.c
new file mode 100644
index 00000000000..e5c2e37f5fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-2.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
+
+#include <stdint-gcc.h>
+
+#define TEST_TYPE(TYPE1, TYPE2) \
+ __attribute__ (( \
+ noipa)) void vnshift_##TYPE1##_##TYPE2 (TYPE2 *__restrict dst, \
+ TYPE1 *__restrict a, int n) \
+ { \
+ for (int i = 0; i < n; i++) \
+ dst[i] = a[i] >> 7; \
+ }
+
+#define TEST_ALL() \
+ TEST_TYPE (int16_t, int8_t) \
+ TEST_TYPE (int16_t, uint8_t) \
+ TEST_TYPE (uint16_t, int8_t) \
+ TEST_TYPE (uint16_t, uint8_t) \
+ TEST_TYPE (int32_t, int16_t) \
+ TEST_TYPE (int32_t, uint16_t) \
+ TEST_TYPE (uint32_t, int16_t) \
+ TEST_TYPE (uint32_t, uint16_t) \
+ TEST_TYPE (int64_t, int32_t) \
+ TEST_TYPE (int64_t, uint32_t) \
+ TEST_TYPE (uint64_t, int32_t) \
+ TEST_TYPE (uint64_t, uint32_t)
+
+TEST_ALL ()
+
+/* { dg-final { scan-assembler-times {\tvnsra\.wi} 6 } } */
+/* { dg-final { scan-assembler-times {\tvnsrl\.wi} 6 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-3.c
new file mode 100644
index 00000000000..3b288466394
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-3.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=scalable" } */
+
+#include <stdint-gcc.h>
+
+#define TEST_TYPE(TYPE1, TYPE2) \
+ __attribute__ ((noipa)) void vnshift_##TYPE1##_##TYPE2 ( \
+ TYPE2 *__restrict dst, TYPE1 *__restrict a, TYPE2 b, int n) \
+ { \
+ for (int i = 0; i < n; i++) \
+ dst[i] = a[i] >> b; \
+ }
+
+#define TEST_ALL() \
+ TEST_TYPE (int16_t, int8_t) \
+ TEST_TYPE (int16_t, uint8_t) \
+ TEST_TYPE (uint16_t, int8_t) \
+ TEST_TYPE (uint16_t, uint8_t) \
+ TEST_TYPE (int32_t, int16_t) \
+ TEST_TYPE (int32_t, uint16_t) \
+ TEST_TYPE (uint32_t, int16_t) \
+ TEST_TYPE (uint32_t, uint16_t) \
+ TEST_TYPE (int64_t, int32_t) \
+ TEST_TYPE (int64_t, uint32_t) \
+ TEST_TYPE (uint64_t, int32_t) \
+ TEST_TYPE (uint64_t, uint32_t)
+
+TEST_ALL ()
+
+/* { dg-final { scan-assembler-times {\tvnsra\.wx} 4 } } */
+/* { dg-final { scan-assembler-times {\tvnsrl\.wx} 4 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c
new file mode 100644
index 00000000000..2a898104fa8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c
@@ -0,0 +1,50 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
+
+#include <assert.h>
+#include "narrow-1.c"
+
+#define RUN(TYPE1, TYPE2, SZ) \
+ TYPE1 a##TYPE1##_##TYPE2##_##SZ[SZ]; \
+ TYPE2 b##TYPE1##_##TYPE2##_##SZ[SZ]; \
+ TYPE2 dst##TYPE1##_##TYPE2##_##SZ[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ { \
+ a##TYPE1##_##TYPE2##_##SZ[i] = i % 8723; \
+ b##TYPE1##_##TYPE2##_##SZ[i] = i % (sizeof (TYPE2) * 3); \
+ } \
+ vnshift_##TYPE1##_##TYPE2 (dst##TYPE1##_##TYPE2##_##SZ, \
+ a##TYPE1##_##TYPE2##_##SZ, \
+ b##TYPE1##_##TYPE2##_##SZ, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ { \
+ assert (dst##TYPE1##_##TYPE2##_##SZ[i] \
+ == (TYPE2) (a##TYPE1##_##TYPE2##_##SZ[i] \
+ >> b##TYPE1##_##TYPE2##_##SZ[i])); \
+ }
+
+#define RUN_ALL(SZ) \
+ RUN (int16_t, int8_t, SZ) \
+ RUN (int16_t, uint8_t, SZ) \
+ RUN (uint16_t, int8_t, SZ) \
+ RUN (uint16_t, uint8_t, SZ) \
+ RUN (int32_t, int16_t, SZ) \
+ RUN (int32_t, uint16_t, SZ) \
+ RUN (uint32_t, int16_t, SZ) \
+ RUN (uint32_t, uint16_t, SZ) \
+ RUN (int64_t, int32_t, SZ) \
+ RUN (int64_t, uint32_t, SZ) \
+ RUN (uint64_t, int32_t, SZ) \
+ RUN (uint64_t, uint32_t, SZ)
+
+int
+main ()
+{
+ RUN_ALL (15)
+ RUN_ALL (16)
+ RUN_ALL (17)
+ RUN_ALL (127)
+ RUN_ALL (128)
+ RUN_ALL (129)
+ RUN_ALL (512)
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c
new file mode 100644
index 00000000000..1630ba1a5f8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c
@@ -0,0 +1,46 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
+
+#include <assert.h>
+#include "narrow-2.c"
+
+#define RUN(TYPE1, TYPE2, SZ) \
+ TYPE1 a##TYPE1##_##TYPE2##_##SZ[SZ]; \
+ TYPE2 dst##TYPE1##_##TYPE2##_##SZ[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ { \
+ a##TYPE1##_##TYPE2##_##SZ[i] = i % 8723; \
+ } \
+ vnshift_##TYPE1##_##TYPE2 (dst##TYPE1##_##TYPE2##_##SZ, \
+ a##TYPE1##_##TYPE2##_##SZ, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ { \
+ assert (dst##TYPE1##_##TYPE2##_##SZ[i] \
+ == (TYPE2) (a##TYPE1##_##TYPE2##_##SZ[i] >> 7)); \
+ }
+
+#define RUN_ALL(SZ) \
+ RUN (int16_t, int8_t, SZ) \
+ RUN (int16_t, uint8_t, SZ) \
+ RUN (uint16_t, int8_t, SZ) \
+ RUN (uint16_t, uint8_t, SZ) \
+ RUN (int32_t, int16_t, SZ) \
+ RUN (int32_t, uint16_t, SZ) \
+ RUN (uint32_t, int16_t, SZ) \
+ RUN (uint32_t, uint16_t, SZ) \
+ RUN (int64_t, int32_t, SZ) \
+ RUN (int64_t, uint32_t, SZ) \
+ RUN (uint64_t, int32_t, SZ) \
+ RUN (uint64_t, uint32_t, SZ)
+
+int
+main ()
+{
+ RUN_ALL (15)
+ RUN_ALL (16)
+ RUN_ALL (17)
+ RUN_ALL (127)
+ RUN_ALL (128)
+ RUN_ALL (129)
+ RUN_ALL (512)
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c
new file mode 100644
index 00000000000..7638851e4fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c
@@ -0,0 +1,46 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
+
+#include <assert.h>
+#include "narrow-3.c"
+
+#define RUN(TYPE1, TYPE2, SZ) \
+ TYPE1 a##TYPE1##_##TYPE2##_##SZ[SZ]; \
+ TYPE2 dst##TYPE1##_##TYPE2##_##SZ[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ { \
+ a##TYPE1##_##TYPE2##_##SZ[i] = i % 8723; \
+ } \
+ vnshift_##TYPE1##_##TYPE2 (dst##TYPE1##_##TYPE2##_##SZ, \
+ a##TYPE1##_##TYPE2##_##SZ, 9, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ { \
+ assert (dst##TYPE1##_##TYPE2##_##SZ[i] \
+ == (TYPE2) (a##TYPE1##_##TYPE2##_##SZ[i] >> 9)); \
+ }
+
+#define RUN_ALL(SZ) \
+ RUN (int16_t, int8_t, SZ) \
+ RUN (int16_t, uint8_t, SZ) \
+ RUN (uint16_t, int8_t, SZ) \
+ RUN (uint16_t, uint8_t, SZ) \
+ RUN (int32_t, int16_t, SZ) \
+ RUN (int32_t, uint16_t, SZ) \
+ RUN (uint32_t, int16_t, SZ) \
+ RUN (uint32_t, uint16_t, SZ) \
+ RUN (int64_t, int32_t, SZ) \
+ RUN (int64_t, uint32_t, SZ) \
+ RUN (uint64_t, int32_t, SZ) \
+ RUN (uint64_t, uint32_t, SZ)
+
+int
+main ()
+{
+ RUN_ALL (15)
+ RUN_ALL (16)
+ RUN_ALL (17)
+ RUN_ALL (127)
+ RUN_ALL (128)
+ RUN_ALL (129)
+ RUN_ALL (512)
+}
--
2.36.3
We have two style predictor for those define_insn_and_split patterns,
"TARGET_VECTOR"/"&& can_create_pseudo_p ()" and "TARGET_VECTOR &&
can_create_pseudo_p ()"/"&& 1", could you unify all to later form? I
feel that would be safer since those patterns are really only valid
before RA(can_create_pseudo_p() == true), although it's mostly used by
combine pass so it's mostly safe, but IMO we should fix this soon
rather than fix that until we hit this later.
OK for this patch as it is, and I would like to have a separated patch
to fix all those issues.
On Mon, Jun 12, 2023 at 8:27 PM juzhe.zhong@rivai.ai
<juzhe.zhong@rivai.ai> wrote:
>
> Is this patch ok for trunk?
>
>
>
> juzhe.zhong@rivai.ai
>
> From: juzhe.zhong
> Date: 2023-06-12 10:41
> To: gcc-patches
> CC: kito.cheng; kito.cheng; palmer; palmer; jeffreyalaw; rdapp.gcc; Juzhe-Zhong
> Subject: [PATCH] RISC-V: Add RVV narrow shift right lowering auto-vectorization
> From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
>
> Optimize the following auto-vectorization codes:
> void foo (int16_t * __restrict a, int32_t * __restrict b, int32_t c, int n)
> {
> for (int i = 0; i < n; i++)
> a[i] = b[i] >> c;
> }
>
> Before this patch:
> foo:
> ble a3,zero,.L5
> .L3:
> vsetvli a5,a3,e32,m1,ta,ma
> vle32.v v1,0(a1)
> vsetvli a4,zero,e32,m1,ta,ma
> vsra.vx v1,v1,a2
> vsetvli zero,zero,e16,mf2,ta,ma
> slli a7,a5,2
> vncvt.x.x.w v1,v1
> slli a6,a5,1
> vsetvli zero,a5,e16,mf2,ta,ma
> sub a3,a3,a5
> vse16.v v1,0(a0)
> add a1,a1,a7
> add a0,a0,a6
> bne a3,zero,.L3
> .L5:
> ret
>
> After this patch:
> foo:
> ble a3,zero,.L5
> .L3:
> vsetvli a5,a3,e32,m1,ta,ma
> vle32.v v1,0(a1)
> vsetvli a7,zero,e16,mf2,ta,ma
> slli a6,a5,2
> vnsra.wx v1,v1,a2
> slli a4,a5,1
> vsetvli zero,a5,e16,mf2,ta,ma
> sub a3,a3,a5
> vse16.v v1,0(a0)
> add a1,a1,a6
> add a0,a0,a4
> bne a3,zero,.L3
> .L5:
> ret
>
> gcc/ChangeLog:
>
> * config/riscv/autovec-opt.md (*v<any_shiftrt:optab><any_extend:optab>trunc<mode>): New pattern.
> (*<any_shiftrt:optab>trunc<mode>): Ditto.
> * config/riscv/autovec.md (<optab><mode>3): Change to define_insn_and_split.
> (v<optab><mode>3): Ditto.
> (trunc<mode><v_double_trunc>2): Ditto.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/autovec/binop/narrow-1.c: New test.
> * gcc.target/riscv/rvv/autovec/binop/narrow-2.c: New test.
> * gcc.target/riscv/rvv/autovec/binop/narrow-3.c: New test.
> * gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c: New test.
> * gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c: New test.
> * gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c: New test.
>
> ---
> gcc/config/riscv/autovec-opt.md | 46 +++++++++++++++++
> gcc/config/riscv/autovec.md | 43 ++++++++++------
> .../riscv/rvv/autovec/binop/narrow-1.c | 31 ++++++++++++
> .../riscv/rvv/autovec/binop/narrow-2.c | 32 ++++++++++++
> .../riscv/rvv/autovec/binop/narrow-3.c | 31 ++++++++++++
> .../riscv/rvv/autovec/binop/narrow_run-1.c | 50 +++++++++++++++++++
> .../riscv/rvv/autovec/binop/narrow_run-2.c | 46 +++++++++++++++++
> .../riscv/rvv/autovec/binop/narrow_run-3.c | 46 +++++++++++++++++
> 8 files changed, 311 insertions(+), 14 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-1.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-2.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-3.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c
>
> diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
> index 7bb93eed220..aef28e445e1 100644
> --- a/gcc/config/riscv/autovec-opt.md
> +++ b/gcc/config/riscv/autovec-opt.md
> @@ -330,3 +330,49 @@
> }
> [(set_attr "type" "viwmuladd")
> (set_attr "mode" "<V_DOUBLE_TRUNC>")])
> +
> +;; -------------------------------------------------------------------------
> +;; ---- [INT] Binary narrow shifts.
> +;; -------------------------------------------------------------------------
> +;; Includes:
> +;; - vnsrl.wv/vnsrl.wx/vnsrl.wi
> +;; - vnsra.wv/vnsra.wx/vnsra.wi
> +;; -------------------------------------------------------------------------
> +
> +(define_insn_and_split "*v<any_shiftrt:optab><any_extend:optab>trunc<mode>"
> + [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand" "=vr,vr")
> + (truncate:<V_DOUBLE_TRUNC>
> + (any_shiftrt:VWEXTI
> + (match_operand:VWEXTI 1 "register_operand" " vr,vr")
> + (any_extend:VWEXTI
> + (match_operand:<V_DOUBLE_TRUNC> 2 "vector_shift_operand" " vr,vk")))))]
> + "TARGET_VECTOR"
> + "#"
> + "&& can_create_pseudo_p ()"
> + [(const_int 0)]
> +{
> + insn_code icode = code_for_pred_narrow (<any_shiftrt:CODE>, <MODE>mode);
> + riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_BINOP, operands);
> + DONE;
> +}
> + [(set_attr "type" "vnshift")
> + (set_attr "mode" "<V_DOUBLE_TRUNC>")])
> +
> +(define_insn_and_split "*<any_shiftrt:optab>trunc<mode>"
> + [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand" "=vr")
> + (truncate:<V_DOUBLE_TRUNC>
> + (any_shiftrt:VWEXTI
> + (match_operand:VWEXTI 1 "register_operand" " vr")
> + (match_operand:<VEL> 2 "csr_operand" " rK"))))]
> + "TARGET_VECTOR"
> + "#"
> + "&& can_create_pseudo_p ()"
> + [(const_int 0)]
> +{
> + operands[2] = gen_lowpart (Pmode, operands[2]);
> + insn_code icode = code_for_pred_narrow_scalar (<any_shiftrt:CODE>, <MODE>mode);
> + riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_BINOP, operands);
> + DONE;
> +}
> + [(set_attr "type" "vnshift")
> + (set_attr "mode" "<V_DOUBLE_TRUNC>")])
> diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
> index b7070099f29..eadc2c5b595 100644
> --- a/gcc/config/riscv/autovec.md
> +++ b/gcc/config/riscv/autovec.md
> @@ -150,18 +150,23 @@
> ;; - vsll.vi/vsra.vi/vsrl.vi
> ;; -------------------------------------------------------------------------
> -(define_expand "<optab><mode>3"
> - [(set (match_operand:VI 0 "register_operand")
> +(define_insn_and_split "<optab><mode>3"
> + [(set (match_operand:VI 0 "register_operand" "=vr")
> (any_shift:VI
> - (match_operand:VI 1 "register_operand")
> - (match_operand:<VEL> 2 "csr_operand")))]
> + (match_operand:VI 1 "register_operand" " vr")
> + (match_operand:<VEL> 2 "csr_operand" " rK")))]
> "TARGET_VECTOR"
> + "#"
> + "&& can_create_pseudo_p ()"
> + [(const_int 0)]
> {
> operands[2] = gen_lowpart (Pmode, operands[2]);
> riscv_vector::emit_vlmax_insn (code_for_pred_scalar (<CODE>, <MODE>mode),
> riscv_vector::RVV_BINOP, operands);
> DONE;
> -})
> +}
> + [(set_attr "type" "vshift")
> + (set_attr "mode" "<MODE>")])
> ;; -------------------------------------------------------------------------
> ;; ---- [INT] Binary shifts by scalar.
> @@ -170,17 +175,22 @@
> ;; - vsll.vv/vsra.vv/vsrl.vv
> ;; -------------------------------------------------------------------------
> -(define_expand "v<optab><mode>3"
> - [(set (match_operand:VI 0 "register_operand")
> +(define_insn_and_split "v<optab><mode>3"
> + [(set (match_operand:VI 0 "register_operand" "=vr,vr")
> (any_shift:VI
> - (match_operand:VI 1 "register_operand")
> - (match_operand:VI 2 "vector_shift_operand")))]
> + (match_operand:VI 1 "register_operand" " vr,vr")
> + (match_operand:VI 2 "vector_shift_operand" " vr,vk")))]
> "TARGET_VECTOR"
> + "#"
> + "&& can_create_pseudo_p ()"
> + [(const_int 0)]
> {
> riscv_vector::emit_vlmax_insn (code_for_pred (<CODE>, <MODE>mode),
> riscv_vector::RVV_BINOP, operands);
> DONE;
> -})
> +}
> + [(set_attr "type" "vshift")
> + (set_attr "mode" "<MODE>")])
> ;; -------------------------------------------------------------------------
> ;; ---- [BOOL] Binary logical operations
> @@ -395,16 +405,21 @@
> ;; -------------------------------------------------------------------------
> ;; - vncvt.x.x.w
> ;; -------------------------------------------------------------------------
> -(define_expand "trunc<mode><v_double_trunc>2"
> - [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand")
> +(define_insn_and_split "trunc<mode><v_double_trunc>2"
> + [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand" "=vr")
> (truncate:<V_DOUBLE_TRUNC>
> - (match_operand:VWEXTI 1 "register_operand")))]
> + (match_operand:VWEXTI 1 "register_operand" " vr")))]
> "TARGET_VECTOR"
> + "#"
> + "&& can_create_pseudo_p ()"
> + [(const_int 0)]
> {
> insn_code icode = code_for_pred_trunc (<MODE>mode);
> riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, operands);
> DONE;
> -})
> +}
> + [(set_attr "type" "vshift")
> + (set_attr "mode" "<MODE>")])
> ;; -------------------------------------------------------------------------
> ;; Truncation to a mode whose inner mode size is a quarter of mode's.
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-1.c
> new file mode 100644
> index 00000000000..3de8d85b52d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-1.c
> @@ -0,0 +1,31 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
> +
> +#include <stdint-gcc.h>
> +
> +#define TEST_TYPE(TYPE1, TYPE2) \
> + __attribute__ ((noipa)) void vnshift_##TYPE1##_##TYPE2 ( \
> + TYPE2 *__restrict dst, TYPE1 *__restrict a, TYPE2 *__restrict b, int n) \
> + { \
> + for (int i = 0; i < n; i++) \
> + dst[i] = a[i] >> b[i]; \
> + }
> +
> +#define TEST_ALL() \
> + TEST_TYPE (int16_t, int8_t) \
> + TEST_TYPE (int16_t, uint8_t) \
> + TEST_TYPE (uint16_t, int8_t) \
> + TEST_TYPE (uint16_t, uint8_t) \
> + TEST_TYPE (int32_t, int16_t) \
> + TEST_TYPE (int32_t, uint16_t) \
> + TEST_TYPE (uint32_t, int16_t) \
> + TEST_TYPE (uint32_t, uint16_t) \
> + TEST_TYPE (int64_t, int32_t) \
> + TEST_TYPE (int64_t, uint32_t) \
> + TEST_TYPE (uint64_t, int32_t) \
> + TEST_TYPE (uint64_t, uint32_t)
> +
> +TEST_ALL ()
> +
> +/* { dg-final { scan-assembler-times {\tvnsra\.wv} 6 } } */
> +/* { dg-final { scan-assembler-times {\tvnsrl\.wv} 5 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-2.c
> new file mode 100644
> index 00000000000..e5c2e37f5fa
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-2.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
> +
> +#include <stdint-gcc.h>
> +
> +#define TEST_TYPE(TYPE1, TYPE2) \
> + __attribute__ (( \
> + noipa)) void vnshift_##TYPE1##_##TYPE2 (TYPE2 *__restrict dst, \
> + TYPE1 *__restrict a, int n) \
> + { \
> + for (int i = 0; i < n; i++) \
> + dst[i] = a[i] >> 7; \
> + }
> +
> +#define TEST_ALL() \
> + TEST_TYPE (int16_t, int8_t) \
> + TEST_TYPE (int16_t, uint8_t) \
> + TEST_TYPE (uint16_t, int8_t) \
> + TEST_TYPE (uint16_t, uint8_t) \
> + TEST_TYPE (int32_t, int16_t) \
> + TEST_TYPE (int32_t, uint16_t) \
> + TEST_TYPE (uint32_t, int16_t) \
> + TEST_TYPE (uint32_t, uint16_t) \
> + TEST_TYPE (int64_t, int32_t) \
> + TEST_TYPE (int64_t, uint32_t) \
> + TEST_TYPE (uint64_t, int32_t) \
> + TEST_TYPE (uint64_t, uint32_t)
> +
> +TEST_ALL ()
> +
> +/* { dg-final { scan-assembler-times {\tvnsra\.wi} 6 } } */
> +/* { dg-final { scan-assembler-times {\tvnsrl\.wi} 6 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-3.c
> new file mode 100644
> index 00000000000..3b288466394
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-3.c
> @@ -0,0 +1,31 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=scalable" } */
> +
> +#include <stdint-gcc.h>
> +
> +#define TEST_TYPE(TYPE1, TYPE2) \
> + __attribute__ ((noipa)) void vnshift_##TYPE1##_##TYPE2 ( \
> + TYPE2 *__restrict dst, TYPE1 *__restrict a, TYPE2 b, int n) \
> + { \
> + for (int i = 0; i < n; i++) \
> + dst[i] = a[i] >> b; \
> + }
> +
> +#define TEST_ALL() \
> + TEST_TYPE (int16_t, int8_t) \
> + TEST_TYPE (int16_t, uint8_t) \
> + TEST_TYPE (uint16_t, int8_t) \
> + TEST_TYPE (uint16_t, uint8_t) \
> + TEST_TYPE (int32_t, int16_t) \
> + TEST_TYPE (int32_t, uint16_t) \
> + TEST_TYPE (uint32_t, int16_t) \
> + TEST_TYPE (uint32_t, uint16_t) \
> + TEST_TYPE (int64_t, int32_t) \
> + TEST_TYPE (int64_t, uint32_t) \
> + TEST_TYPE (uint64_t, int32_t) \
> + TEST_TYPE (uint64_t, uint32_t)
> +
> +TEST_ALL ()
> +
> +/* { dg-final { scan-assembler-times {\tvnsra\.wx} 4 } } */
> +/* { dg-final { scan-assembler-times {\tvnsrl\.wx} 4 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c
> new file mode 100644
> index 00000000000..2a898104fa8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c
> @@ -0,0 +1,50 @@
> +/* { dg-do run { target { riscv_vector } } } */
> +/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
> +
> +#include <assert.h>
> +#include "narrow-1.c"
> +
> +#define RUN(TYPE1, TYPE2, SZ) \
> + TYPE1 a##TYPE1##_##TYPE2##_##SZ[SZ]; \
> + TYPE2 b##TYPE1##_##TYPE2##_##SZ[SZ]; \
> + TYPE2 dst##TYPE1##_##TYPE2##_##SZ[SZ]; \
> + for (int i = 0; i < SZ; i++) \
> + { \
> + a##TYPE1##_##TYPE2##_##SZ[i] = i % 8723; \
> + b##TYPE1##_##TYPE2##_##SZ[i] = i % (sizeof (TYPE2) * 3); \
> + } \
> + vnshift_##TYPE1##_##TYPE2 (dst##TYPE1##_##TYPE2##_##SZ, \
> + a##TYPE1##_##TYPE2##_##SZ, \
> + b##TYPE1##_##TYPE2##_##SZ, SZ); \
> + for (int i = 0; i < SZ; i++) \
> + { \
> + assert (dst##TYPE1##_##TYPE2##_##SZ[i] \
> + == (TYPE2) (a##TYPE1##_##TYPE2##_##SZ[i] \
> + >> b##TYPE1##_##TYPE2##_##SZ[i])); \
> + }
> +
> +#define RUN_ALL(SZ) \
> + RUN (int16_t, int8_t, SZ) \
> + RUN (int16_t, uint8_t, SZ) \
> + RUN (uint16_t, int8_t, SZ) \
> + RUN (uint16_t, uint8_t, SZ) \
> + RUN (int32_t, int16_t, SZ) \
> + RUN (int32_t, uint16_t, SZ) \
> + RUN (uint32_t, int16_t, SZ) \
> + RUN (uint32_t, uint16_t, SZ) \
> + RUN (int64_t, int32_t, SZ) \
> + RUN (int64_t, uint32_t, SZ) \
> + RUN (uint64_t, int32_t, SZ) \
> + RUN (uint64_t, uint32_t, SZ)
> +
> +int
> +main ()
> +{
> + RUN_ALL (15)
> + RUN_ALL (16)
> + RUN_ALL (17)
> + RUN_ALL (127)
> + RUN_ALL (128)
> + RUN_ALL (129)
> + RUN_ALL (512)
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c
> new file mode 100644
> index 00000000000..1630ba1a5f8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c
> @@ -0,0 +1,46 @@
> +/* { dg-do run { target { riscv_vector } } } */
> +/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
> +
> +#include <assert.h>
> +#include "narrow-2.c"
> +
> +#define RUN(TYPE1, TYPE2, SZ) \
> + TYPE1 a##TYPE1##_##TYPE2##_##SZ[SZ]; \
> + TYPE2 dst##TYPE1##_##TYPE2##_##SZ[SZ]; \
> + for (int i = 0; i < SZ; i++) \
> + { \
> + a##TYPE1##_##TYPE2##_##SZ[i] = i % 8723; \
> + } \
> + vnshift_##TYPE1##_##TYPE2 (dst##TYPE1##_##TYPE2##_##SZ, \
> + a##TYPE1##_##TYPE2##_##SZ, SZ); \
> + for (int i = 0; i < SZ; i++) \
> + { \
> + assert (dst##TYPE1##_##TYPE2##_##SZ[i] \
> + == (TYPE2) (a##TYPE1##_##TYPE2##_##SZ[i] >> 7)); \
> + }
> +
> +#define RUN_ALL(SZ) \
> + RUN (int16_t, int8_t, SZ) \
> + RUN (int16_t, uint8_t, SZ) \
> + RUN (uint16_t, int8_t, SZ) \
> + RUN (uint16_t, uint8_t, SZ) \
> + RUN (int32_t, int16_t, SZ) \
> + RUN (int32_t, uint16_t, SZ) \
> + RUN (uint32_t, int16_t, SZ) \
> + RUN (uint32_t, uint16_t, SZ) \
> + RUN (int64_t, int32_t, SZ) \
> + RUN (int64_t, uint32_t, SZ) \
> + RUN (uint64_t, int32_t, SZ) \
> + RUN (uint64_t, uint32_t, SZ)
> +
> +int
> +main ()
> +{
> + RUN_ALL (15)
> + RUN_ALL (16)
> + RUN_ALL (17)
> + RUN_ALL (127)
> + RUN_ALL (128)
> + RUN_ALL (129)
> + RUN_ALL (512)
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c
> new file mode 100644
> index 00000000000..7638851e4fa
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c
> @@ -0,0 +1,46 @@
> +/* { dg-do run { target { riscv_vector } } } */
> +/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
> +
> +#include <assert.h>
> +#include "narrow-3.c"
> +
> +#define RUN(TYPE1, TYPE2, SZ) \
> + TYPE1 a##TYPE1##_##TYPE2##_##SZ[SZ]; \
> + TYPE2 dst##TYPE1##_##TYPE2##_##SZ[SZ]; \
> + for (int i = 0; i < SZ; i++) \
> + { \
> + a##TYPE1##_##TYPE2##_##SZ[i] = i % 8723; \
> + } \
> + vnshift_##TYPE1##_##TYPE2 (dst##TYPE1##_##TYPE2##_##SZ, \
> + a##TYPE1##_##TYPE2##_##SZ, 9, SZ); \
> + for (int i = 0; i < SZ; i++) \
> + { \
> + assert (dst##TYPE1##_##TYPE2##_##SZ[i] \
> + == (TYPE2) (a##TYPE1##_##TYPE2##_##SZ[i] >> 9)); \
> + }
> +
> +#define RUN_ALL(SZ) \
> + RUN (int16_t, int8_t, SZ) \
> + RUN (int16_t, uint8_t, SZ) \
> + RUN (uint16_t, int8_t, SZ) \
> + RUN (uint16_t, uint8_t, SZ) \
> + RUN (int32_t, int16_t, SZ) \
> + RUN (int32_t, uint16_t, SZ) \
> + RUN (uint32_t, int16_t, SZ) \
> + RUN (uint32_t, uint16_t, SZ) \
> + RUN (int64_t, int32_t, SZ) \
> + RUN (int64_t, uint32_t, SZ) \
> + RUN (uint64_t, int32_t, SZ) \
> + RUN (uint64_t, uint32_t, SZ)
> +
> +int
> +main ()
> +{
> + RUN_ALL (15)
> + RUN_ALL (16)
> + RUN_ALL (17)
> + RUN_ALL (127)
> + RUN_ALL (128)
> + RUN_ALL (129)
> + RUN_ALL (512)
> +}
> --
> 2.36.3
>
>
You mean change all split pattern like this ?
;; This helps to match zero_extend + sign_extend + fma.
(define_insn_and_split "*zero_sign_extend_fma"
[(set (match_operand:VWEXTI 0 "register_operand")
(plus:VWEXTI
(mult:VWEXTI
(zero_extend:VWEXTI
(match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))
(sign_extend:VWEXTI
(match_operand:<V_DOUBLE_TRUNC> 3 "register_operand")))
(match_operand:VWEXTI 1 "register_operand")))]
"TARGET_VECTOR && can_create_pseudo_p ()"
"#"
"&& 1"
[(const_int 0)]
juzhe.zhong@rivai.ai
From: Kito Cheng
Date: 2023-06-12 20:37
To: juzhe.zhong@rivai.ai
CC: gcc-patches; Kito.cheng; palmer; palmer; jeffreyalaw; Robin Dapp
Subject: Re: [PATCH] RISC-V: Add RVV narrow shift right lowering auto-vectorization
We have two style predictor for those define_insn_and_split patterns,
"TARGET_VECTOR"/"&& can_create_pseudo_p ()" and "TARGET_VECTOR &&
can_create_pseudo_p ()"/"&& 1", could you unify all to later form? I
feel that would be safer since those patterns are really only valid
before RA(can_create_pseudo_p() == true), although it's mostly used by
combine pass so it's mostly safe, but IMO we should fix this soon
rather than fix that until we hit this later.
OK for this patch as it is, and I would like to have a separated patch
to fix all those issues.
On Mon, Jun 12, 2023 at 8:27 PM juzhe.zhong@rivai.ai
<juzhe.zhong@rivai.ai> wrote:
>
> Is this patch ok for trunk?
>
>
>
> juzhe.zhong@rivai.ai
>
> From: juzhe.zhong
> Date: 2023-06-12 10:41
> To: gcc-patches
> CC: kito.cheng; kito.cheng; palmer; palmer; jeffreyalaw; rdapp.gcc; Juzhe-Zhong
> Subject: [PATCH] RISC-V: Add RVV narrow shift right lowering auto-vectorization
> From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
>
> Optimize the following auto-vectorization codes:
> void foo (int16_t * __restrict a, int32_t * __restrict b, int32_t c, int n)
> {
> for (int i = 0; i < n; i++)
> a[i] = b[i] >> c;
> }
>
> Before this patch:
> foo:
> ble a3,zero,.L5
> .L3:
> vsetvli a5,a3,e32,m1,ta,ma
> vle32.v v1,0(a1)
> vsetvli a4,zero,e32,m1,ta,ma
> vsra.vx v1,v1,a2
> vsetvli zero,zero,e16,mf2,ta,ma
> slli a7,a5,2
> vncvt.x.x.w v1,v1
> slli a6,a5,1
> vsetvli zero,a5,e16,mf2,ta,ma
> sub a3,a3,a5
> vse16.v v1,0(a0)
> add a1,a1,a7
> add a0,a0,a6
> bne a3,zero,.L3
> .L5:
> ret
>
> After this patch:
> foo:
> ble a3,zero,.L5
> .L3:
> vsetvli a5,a3,e32,m1,ta,ma
> vle32.v v1,0(a1)
> vsetvli a7,zero,e16,mf2,ta,ma
> slli a6,a5,2
> vnsra.wx v1,v1,a2
> slli a4,a5,1
> vsetvli zero,a5,e16,mf2,ta,ma
> sub a3,a3,a5
> vse16.v v1,0(a0)
> add a1,a1,a6
> add a0,a0,a4
> bne a3,zero,.L3
> .L5:
> ret
>
> gcc/ChangeLog:
>
> * config/riscv/autovec-opt.md (*v<any_shiftrt:optab><any_extend:optab>trunc<mode>): New pattern.
> (*<any_shiftrt:optab>trunc<mode>): Ditto.
> * config/riscv/autovec.md (<optab><mode>3): Change to define_insn_and_split.
> (v<optab><mode>3): Ditto.
> (trunc<mode><v_double_trunc>2): Ditto.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/autovec/binop/narrow-1.c: New test.
> * gcc.target/riscv/rvv/autovec/binop/narrow-2.c: New test.
> * gcc.target/riscv/rvv/autovec/binop/narrow-3.c: New test.
> * gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c: New test.
> * gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c: New test.
> * gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c: New test.
>
> ---
> gcc/config/riscv/autovec-opt.md | 46 +++++++++++++++++
> gcc/config/riscv/autovec.md | 43 ++++++++++------
> .../riscv/rvv/autovec/binop/narrow-1.c | 31 ++++++++++++
> .../riscv/rvv/autovec/binop/narrow-2.c | 32 ++++++++++++
> .../riscv/rvv/autovec/binop/narrow-3.c | 31 ++++++++++++
> .../riscv/rvv/autovec/binop/narrow_run-1.c | 50 +++++++++++++++++++
> .../riscv/rvv/autovec/binop/narrow_run-2.c | 46 +++++++++++++++++
> .../riscv/rvv/autovec/binop/narrow_run-3.c | 46 +++++++++++++++++
> 8 files changed, 311 insertions(+), 14 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-1.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-2.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-3.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c
>
> diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
> index 7bb93eed220..aef28e445e1 100644
> --- a/gcc/config/riscv/autovec-opt.md
> +++ b/gcc/config/riscv/autovec-opt.md
> @@ -330,3 +330,49 @@
> }
> [(set_attr "type" "viwmuladd")
> (set_attr "mode" "<V_DOUBLE_TRUNC>")])
> +
> +;; -------------------------------------------------------------------------
> +;; ---- [INT] Binary narrow shifts.
> +;; -------------------------------------------------------------------------
> +;; Includes:
> +;; - vnsrl.wv/vnsrl.wx/vnsrl.wi
> +;; - vnsra.wv/vnsra.wx/vnsra.wi
> +;; -------------------------------------------------------------------------
> +
> +(define_insn_and_split "*v<any_shiftrt:optab><any_extend:optab>trunc<mode>"
> + [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand" "=vr,vr")
> + (truncate:<V_DOUBLE_TRUNC>
> + (any_shiftrt:VWEXTI
> + (match_operand:VWEXTI 1 "register_operand" " vr,vr")
> + (any_extend:VWEXTI
> + (match_operand:<V_DOUBLE_TRUNC> 2 "vector_shift_operand" " vr,vk")))))]
> + "TARGET_VECTOR"
> + "#"
> + "&& can_create_pseudo_p ()"
> + [(const_int 0)]
> +{
> + insn_code icode = code_for_pred_narrow (<any_shiftrt:CODE>, <MODE>mode);
> + riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_BINOP, operands);
> + DONE;
> +}
> + [(set_attr "type" "vnshift")
> + (set_attr "mode" "<V_DOUBLE_TRUNC>")])
> +
> +(define_insn_and_split "*<any_shiftrt:optab>trunc<mode>"
> + [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand" "=vr")
> + (truncate:<V_DOUBLE_TRUNC>
> + (any_shiftrt:VWEXTI
> + (match_operand:VWEXTI 1 "register_operand" " vr")
> + (match_operand:<VEL> 2 "csr_operand" " rK"))))]
> + "TARGET_VECTOR"
> + "#"
> + "&& can_create_pseudo_p ()"
> + [(const_int 0)]
> +{
> + operands[2] = gen_lowpart (Pmode, operands[2]);
> + insn_code icode = code_for_pred_narrow_scalar (<any_shiftrt:CODE>, <MODE>mode);
> + riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_BINOP, operands);
> + DONE;
> +}
> + [(set_attr "type" "vnshift")
> + (set_attr "mode" "<V_DOUBLE_TRUNC>")])
> diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
> index b7070099f29..eadc2c5b595 100644
> --- a/gcc/config/riscv/autovec.md
> +++ b/gcc/config/riscv/autovec.md
> @@ -150,18 +150,23 @@
> ;; - vsll.vi/vsra.vi/vsrl.vi
> ;; -------------------------------------------------------------------------
> -(define_expand "<optab><mode>3"
> - [(set (match_operand:VI 0 "register_operand")
> +(define_insn_and_split "<optab><mode>3"
> + [(set (match_operand:VI 0 "register_operand" "=vr")
> (any_shift:VI
> - (match_operand:VI 1 "register_operand")
> - (match_operand:<VEL> 2 "csr_operand")))]
> + (match_operand:VI 1 "register_operand" " vr")
> + (match_operand:<VEL> 2 "csr_operand" " rK")))]
> "TARGET_VECTOR"
> + "#"
> + "&& can_create_pseudo_p ()"
> + [(const_int 0)]
> {
> operands[2] = gen_lowpart (Pmode, operands[2]);
> riscv_vector::emit_vlmax_insn (code_for_pred_scalar (<CODE>, <MODE>mode),
> riscv_vector::RVV_BINOP, operands);
> DONE;
> -})
> +}
> + [(set_attr "type" "vshift")
> + (set_attr "mode" "<MODE>")])
> ;; -------------------------------------------------------------------------
> ;; ---- [INT] Binary shifts by scalar.
> @@ -170,17 +175,22 @@
> ;; - vsll.vv/vsra.vv/vsrl.vv
> ;; -------------------------------------------------------------------------
> -(define_expand "v<optab><mode>3"
> - [(set (match_operand:VI 0 "register_operand")
> +(define_insn_and_split "v<optab><mode>3"
> + [(set (match_operand:VI 0 "register_operand" "=vr,vr")
> (any_shift:VI
> - (match_operand:VI 1 "register_operand")
> - (match_operand:VI 2 "vector_shift_operand")))]
> + (match_operand:VI 1 "register_operand" " vr,vr")
> + (match_operand:VI 2 "vector_shift_operand" " vr,vk")))]
> "TARGET_VECTOR"
> + "#"
> + "&& can_create_pseudo_p ()"
> + [(const_int 0)]
> {
> riscv_vector::emit_vlmax_insn (code_for_pred (<CODE>, <MODE>mode),
> riscv_vector::RVV_BINOP, operands);
> DONE;
> -})
> +}
> + [(set_attr "type" "vshift")
> + (set_attr "mode" "<MODE>")])
> ;; -------------------------------------------------------------------------
> ;; ---- [BOOL] Binary logical operations
> @@ -395,16 +405,21 @@
> ;; -------------------------------------------------------------------------
> ;; - vncvt.x.x.w
> ;; -------------------------------------------------------------------------
> -(define_expand "trunc<mode><v_double_trunc>2"
> - [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand")
> +(define_insn_and_split "trunc<mode><v_double_trunc>2"
> + [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand" "=vr")
> (truncate:<V_DOUBLE_TRUNC>
> - (match_operand:VWEXTI 1 "register_operand")))]
> + (match_operand:VWEXTI 1 "register_operand" " vr")))]
> "TARGET_VECTOR"
> + "#"
> + "&& can_create_pseudo_p ()"
> + [(const_int 0)]
> {
> insn_code icode = code_for_pred_trunc (<MODE>mode);
> riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, operands);
> DONE;
> -})
> +}
> + [(set_attr "type" "vshift")
> + (set_attr "mode" "<MODE>")])
> ;; -------------------------------------------------------------------------
> ;; Truncation to a mode whose inner mode size is a quarter of mode's.
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-1.c
> new file mode 100644
> index 00000000000..3de8d85b52d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-1.c
> @@ -0,0 +1,31 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
> +
> +#include <stdint-gcc.h>
> +
> +#define TEST_TYPE(TYPE1, TYPE2) \
> + __attribute__ ((noipa)) void vnshift_##TYPE1##_##TYPE2 ( \
> + TYPE2 *__restrict dst, TYPE1 *__restrict a, TYPE2 *__restrict b, int n) \
> + { \
> + for (int i = 0; i < n; i++) \
> + dst[i] = a[i] >> b[i]; \
> + }
> +
> +#define TEST_ALL() \
> + TEST_TYPE (int16_t, int8_t) \
> + TEST_TYPE (int16_t, uint8_t) \
> + TEST_TYPE (uint16_t, int8_t) \
> + TEST_TYPE (uint16_t, uint8_t) \
> + TEST_TYPE (int32_t, int16_t) \
> + TEST_TYPE (int32_t, uint16_t) \
> + TEST_TYPE (uint32_t, int16_t) \
> + TEST_TYPE (uint32_t, uint16_t) \
> + TEST_TYPE (int64_t, int32_t) \
> + TEST_TYPE (int64_t, uint32_t) \
> + TEST_TYPE (uint64_t, int32_t) \
> + TEST_TYPE (uint64_t, uint32_t)
> +
> +TEST_ALL ()
> +
> +/* { dg-final { scan-assembler-times {\tvnsra\.wv} 6 } } */
> +/* { dg-final { scan-assembler-times {\tvnsrl\.wv} 5 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-2.c
> new file mode 100644
> index 00000000000..e5c2e37f5fa
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-2.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
> +
> +#include <stdint-gcc.h>
> +
> +#define TEST_TYPE(TYPE1, TYPE2) \
> + __attribute__ (( \
> + noipa)) void vnshift_##TYPE1##_##TYPE2 (TYPE2 *__restrict dst, \
> + TYPE1 *__restrict a, int n) \
> + { \
> + for (int i = 0; i < n; i++) \
> + dst[i] = a[i] >> 7; \
> + }
> +
> +#define TEST_ALL() \
> + TEST_TYPE (int16_t, int8_t) \
> + TEST_TYPE (int16_t, uint8_t) \
> + TEST_TYPE (uint16_t, int8_t) \
> + TEST_TYPE (uint16_t, uint8_t) \
> + TEST_TYPE (int32_t, int16_t) \
> + TEST_TYPE (int32_t, uint16_t) \
> + TEST_TYPE (uint32_t, int16_t) \
> + TEST_TYPE (uint32_t, uint16_t) \
> + TEST_TYPE (int64_t, int32_t) \
> + TEST_TYPE (int64_t, uint32_t) \
> + TEST_TYPE (uint64_t, int32_t) \
> + TEST_TYPE (uint64_t, uint32_t)
> +
> +TEST_ALL ()
> +
> +/* { dg-final { scan-assembler-times {\tvnsra\.wi} 6 } } */
> +/* { dg-final { scan-assembler-times {\tvnsrl\.wi} 6 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-3.c
> new file mode 100644
> index 00000000000..3b288466394
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-3.c
> @@ -0,0 +1,31 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=scalable" } */
> +
> +#include <stdint-gcc.h>
> +
> +#define TEST_TYPE(TYPE1, TYPE2) \
> + __attribute__ ((noipa)) void vnshift_##TYPE1##_##TYPE2 ( \
> + TYPE2 *__restrict dst, TYPE1 *__restrict a, TYPE2 b, int n) \
> + { \
> + for (int i = 0; i < n; i++) \
> + dst[i] = a[i] >> b; \
> + }
> +
> +#define TEST_ALL() \
> + TEST_TYPE (int16_t, int8_t) \
> + TEST_TYPE (int16_t, uint8_t) \
> + TEST_TYPE (uint16_t, int8_t) \
> + TEST_TYPE (uint16_t, uint8_t) \
> + TEST_TYPE (int32_t, int16_t) \
> + TEST_TYPE (int32_t, uint16_t) \
> + TEST_TYPE (uint32_t, int16_t) \
> + TEST_TYPE (uint32_t, uint16_t) \
> + TEST_TYPE (int64_t, int32_t) \
> + TEST_TYPE (int64_t, uint32_t) \
> + TEST_TYPE (uint64_t, int32_t) \
> + TEST_TYPE (uint64_t, uint32_t)
> +
> +TEST_ALL ()
> +
> +/* { dg-final { scan-assembler-times {\tvnsra\.wx} 4 } } */
> +/* { dg-final { scan-assembler-times {\tvnsrl\.wx} 4 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c
> new file mode 100644
> index 00000000000..2a898104fa8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c
> @@ -0,0 +1,50 @@
> +/* { dg-do run { target { riscv_vector } } } */
> +/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
> +
> +#include <assert.h>
> +#include "narrow-1.c"
> +
> +#define RUN(TYPE1, TYPE2, SZ) \
> + TYPE1 a##TYPE1##_##TYPE2##_##SZ[SZ]; \
> + TYPE2 b##TYPE1##_##TYPE2##_##SZ[SZ]; \
> + TYPE2 dst##TYPE1##_##TYPE2##_##SZ[SZ]; \
> + for (int i = 0; i < SZ; i++) \
> + { \
> + a##TYPE1##_##TYPE2##_##SZ[i] = i % 8723; \
> + b##TYPE1##_##TYPE2##_##SZ[i] = i % (sizeof (TYPE2) * 3); \
> + } \
> + vnshift_##TYPE1##_##TYPE2 (dst##TYPE1##_##TYPE2##_##SZ, \
> + a##TYPE1##_##TYPE2##_##SZ, \
> + b##TYPE1##_##TYPE2##_##SZ, SZ); \
> + for (int i = 0; i < SZ; i++) \
> + { \
> + assert (dst##TYPE1##_##TYPE2##_##SZ[i] \
> + == (TYPE2) (a##TYPE1##_##TYPE2##_##SZ[i] \
> + >> b##TYPE1##_##TYPE2##_##SZ[i])); \
> + }
> +
> +#define RUN_ALL(SZ) \
> + RUN (int16_t, int8_t, SZ) \
> + RUN (int16_t, uint8_t, SZ) \
> + RUN (uint16_t, int8_t, SZ) \
> + RUN (uint16_t, uint8_t, SZ) \
> + RUN (int32_t, int16_t, SZ) \
> + RUN (int32_t, uint16_t, SZ) \
> + RUN (uint32_t, int16_t, SZ) \
> + RUN (uint32_t, uint16_t, SZ) \
> + RUN (int64_t, int32_t, SZ) \
> + RUN (int64_t, uint32_t, SZ) \
> + RUN (uint64_t, int32_t, SZ) \
> + RUN (uint64_t, uint32_t, SZ)
> +
> +int
> +main ()
> +{
> + RUN_ALL (15)
> + RUN_ALL (16)
> + RUN_ALL (17)
> + RUN_ALL (127)
> + RUN_ALL (128)
> + RUN_ALL (129)
> + RUN_ALL (512)
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c
> new file mode 100644
> index 00000000000..1630ba1a5f8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c
> @@ -0,0 +1,46 @@
> +/* { dg-do run { target { riscv_vector } } } */
> +/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
> +
> +#include <assert.h>
> +#include "narrow-2.c"
> +
> +#define RUN(TYPE1, TYPE2, SZ) \
> + TYPE1 a##TYPE1##_##TYPE2##_##SZ[SZ]; \
> + TYPE2 dst##TYPE1##_##TYPE2##_##SZ[SZ]; \
> + for (int i = 0; i < SZ; i++) \
> + { \
> + a##TYPE1##_##TYPE2##_##SZ[i] = i % 8723; \
> + } \
> + vnshift_##TYPE1##_##TYPE2 (dst##TYPE1##_##TYPE2##_##SZ, \
> + a##TYPE1##_##TYPE2##_##SZ, SZ); \
> + for (int i = 0; i < SZ; i++) \
> + { \
> + assert (dst##TYPE1##_##TYPE2##_##SZ[i] \
> + == (TYPE2) (a##TYPE1##_##TYPE2##_##SZ[i] >> 7)); \
> + }
> +
> +#define RUN_ALL(SZ) \
> + RUN (int16_t, int8_t, SZ) \
> + RUN (int16_t, uint8_t, SZ) \
> + RUN (uint16_t, int8_t, SZ) \
> + RUN (uint16_t, uint8_t, SZ) \
> + RUN (int32_t, int16_t, SZ) \
> + RUN (int32_t, uint16_t, SZ) \
> + RUN (uint32_t, int16_t, SZ) \
> + RUN (uint32_t, uint16_t, SZ) \
> + RUN (int64_t, int32_t, SZ) \
> + RUN (int64_t, uint32_t, SZ) \
> + RUN (uint64_t, int32_t, SZ) \
> + RUN (uint64_t, uint32_t, SZ)
> +
> +int
> +main ()
> +{
> + RUN_ALL (15)
> + RUN_ALL (16)
> + RUN_ALL (17)
> + RUN_ALL (127)
> + RUN_ALL (128)
> + RUN_ALL (129)
> + RUN_ALL (512)
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c
> new file mode 100644
> index 00000000000..7638851e4fa
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c
> @@ -0,0 +1,46 @@
> +/* { dg-do run { target { riscv_vector } } } */
> +/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
> +
> +#include <assert.h>
> +#include "narrow-3.c"
> +
> +#define RUN(TYPE1, TYPE2, SZ) \
> + TYPE1 a##TYPE1##_##TYPE2##_##SZ[SZ]; \
> + TYPE2 dst##TYPE1##_##TYPE2##_##SZ[SZ]; \
> + for (int i = 0; i < SZ; i++) \
> + { \
> + a##TYPE1##_##TYPE2##_##SZ[i] = i % 8723; \
> + } \
> + vnshift_##TYPE1##_##TYPE2 (dst##TYPE1##_##TYPE2##_##SZ, \
> + a##TYPE1##_##TYPE2##_##SZ, 9, SZ); \
> + for (int i = 0; i < SZ; i++) \
> + { \
> + assert (dst##TYPE1##_##TYPE2##_##SZ[i] \
> + == (TYPE2) (a##TYPE1##_##TYPE2##_##SZ[i] >> 9)); \
> + }
> +
> +#define RUN_ALL(SZ) \
> + RUN (int16_t, int8_t, SZ) \
> + RUN (int16_t, uint8_t, SZ) \
> + RUN (uint16_t, int8_t, SZ) \
> + RUN (uint16_t, uint8_t, SZ) \
> + RUN (int32_t, int16_t, SZ) \
> + RUN (int32_t, uint16_t, SZ) \
> + RUN (uint32_t, int16_t, SZ) \
> + RUN (uint32_t, uint16_t, SZ) \
> + RUN (int64_t, int32_t, SZ) \
> + RUN (int64_t, uint32_t, SZ) \
> + RUN (uint64_t, int32_t, SZ) \
> + RUN (uint64_t, uint32_t, SZ)
> +
> +int
> +main ()
> +{
> + RUN_ALL (15)
> + RUN_ALL (16)
> + RUN_ALL (17)
> + RUN_ALL (127)
> + RUN_ALL (128)
> + RUN_ALL (129)
> + RUN_ALL (512)
> +}
> --
> 2.36.3
>
>
Yes, change all define_insn_and_split to that style, "TARGET_VECTOR &&
can_create_pseudo_p ()"/ "&& 1", my understanding is all those
patterns should only work before RA, so all using "TARGET_VECTOR &&
can_create_pseudo_p ()" is more reasonable.
On Mon, Jun 12, 2023 at 8:41 PM juzhe.zhong@rivai.ai
<juzhe.zhong@rivai.ai> wrote:
>
> You mean change all split pattern like this ?
> ;; This helps to match zero_extend + sign_extend + fma.
> (define_insn_and_split "*zero_sign_extend_fma"
> [(set (match_operand:VWEXTI 0 "register_operand")
> (plus:VWEXTI
> (mult:VWEXTI
> (zero_extend:VWEXTI
> (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))
> (sign_extend:VWEXTI
> (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand")))
> (match_operand:VWEXTI 1 "register_operand")))]
> "TARGET_VECTOR && can_create_pseudo_p ()"
> "#"
> "&& 1"
> [(const_int 0)]
>
> ________________________________
> juzhe.zhong@rivai.ai
>
>
> From: Kito Cheng
> Date: 2023-06-12 20:37
> To: juzhe.zhong@rivai.ai
> CC: gcc-patches; Kito.cheng; palmer; palmer; jeffreyalaw; Robin Dapp
> Subject: Re: [PATCH] RISC-V: Add RVV narrow shift right lowering auto-vectorization
> We have two style predictor for those define_insn_and_split patterns,
> "TARGET_VECTOR"/"&& can_create_pseudo_p ()" and "TARGET_VECTOR &&
> can_create_pseudo_p ()"/"&& 1", could you unify all to later form? I
> feel that would be safer since those patterns are really only valid
> before RA(can_create_pseudo_p() == true), although it's mostly used by
> combine pass so it's mostly safe, but IMO we should fix this soon
> rather than fix that until we hit this later.
>
> OK for this patch as it is, and I would like to have a separated patch
> to fix all those issues.
>
> On Mon, Jun 12, 2023 at 8:27 PM juzhe.zhong@rivai.ai
> <juzhe.zhong@rivai.ai> wrote:
> >
> > Is this patch ok for trunk?
> >
> >
> >
> > juzhe.zhong@rivai.ai
> >
> > From: juzhe.zhong
> > Date: 2023-06-12 10:41
> > To: gcc-patches
> > CC: kito.cheng; kito.cheng; palmer; palmer; jeffreyalaw; rdapp.gcc; Juzhe-Zhong
> > Subject: [PATCH] RISC-V: Add RVV narrow shift right lowering auto-vectorization
> > From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
> >
> > Optimize the following auto-vectorization codes:
> > void foo (int16_t * __restrict a, int32_t * __restrict b, int32_t c, int n)
> > {
> > for (int i = 0; i < n; i++)
> > a[i] = b[i] >> c;
> > }
> >
> > Before this patch:
> > foo:
> > ble a3,zero,.L5
> > .L3:
> > vsetvli a5,a3,e32,m1,ta,ma
> > vle32.v v1,0(a1)
> > vsetvli a4,zero,e32,m1,ta,ma
> > vsra.vx v1,v1,a2
> > vsetvli zero,zero,e16,mf2,ta,ma
> > slli a7,a5,2
> > vncvt.x.x.w v1,v1
> > slli a6,a5,1
> > vsetvli zero,a5,e16,mf2,ta,ma
> > sub a3,a3,a5
> > vse16.v v1,0(a0)
> > add a1,a1,a7
> > add a0,a0,a6
> > bne a3,zero,.L3
> > .L5:
> > ret
> >
> > After this patch:
> > foo:
> > ble a3,zero,.L5
> > .L3:
> > vsetvli a5,a3,e32,m1,ta,ma
> > vle32.v v1,0(a1)
> > vsetvli a7,zero,e16,mf2,ta,ma
> > slli a6,a5,2
> > vnsra.wx v1,v1,a2
> > slli a4,a5,1
> > vsetvli zero,a5,e16,mf2,ta,ma
> > sub a3,a3,a5
> > vse16.v v1,0(a0)
> > add a1,a1,a6
> > add a0,a0,a4
> > bne a3,zero,.L3
> > .L5:
> > ret
> >
> > gcc/ChangeLog:
> >
> > * config/riscv/autovec-opt.md (*v<any_shiftrt:optab><any_extend:optab>trunc<mode>): New pattern.
> > (*<any_shiftrt:optab>trunc<mode>): Ditto.
> > * config/riscv/autovec.md (<optab><mode>3): Change to define_insn_and_split.
> > (v<optab><mode>3): Ditto.
> > (trunc<mode><v_double_trunc>2): Ditto.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/riscv/rvv/autovec/binop/narrow-1.c: New test.
> > * gcc.target/riscv/rvv/autovec/binop/narrow-2.c: New test.
> > * gcc.target/riscv/rvv/autovec/binop/narrow-3.c: New test.
> > * gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c: New test.
> > * gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c: New test.
> > * gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c: New test.
> >
> > ---
> > gcc/config/riscv/autovec-opt.md | 46 +++++++++++++++++
> > gcc/config/riscv/autovec.md | 43 ++++++++++------
> > .../riscv/rvv/autovec/binop/narrow-1.c | 31 ++++++++++++
> > .../riscv/rvv/autovec/binop/narrow-2.c | 32 ++++++++++++
> > .../riscv/rvv/autovec/binop/narrow-3.c | 31 ++++++++++++
> > .../riscv/rvv/autovec/binop/narrow_run-1.c | 50 +++++++++++++++++++
> > .../riscv/rvv/autovec/binop/narrow_run-2.c | 46 +++++++++++++++++
> > .../riscv/rvv/autovec/binop/narrow_run-3.c | 46 +++++++++++++++++
> > 8 files changed, 311 insertions(+), 14 deletions(-)
> > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-1.c
> > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-2.c
> > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-3.c
> > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c
> > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c
> > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c
> >
> > diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
> > index 7bb93eed220..aef28e445e1 100644
> > --- a/gcc/config/riscv/autovec-opt.md
> > +++ b/gcc/config/riscv/autovec-opt.md
> > @@ -330,3 +330,49 @@
> > }
> > [(set_attr "type" "viwmuladd")
> > (set_attr "mode" "<V_DOUBLE_TRUNC>")])
> > +
> > +;; -------------------------------------------------------------------------
> > +;; ---- [INT] Binary narrow shifts.
> > +;; -------------------------------------------------------------------------
> > +;; Includes:
> > +;; - vnsrl.wv/vnsrl.wx/vnsrl.wi
> > +;; - vnsra.wv/vnsra.wx/vnsra.wi
> > +;; -------------------------------------------------------------------------
> > +
> > +(define_insn_and_split "*v<any_shiftrt:optab><any_extend:optab>trunc<mode>"
> > + [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand" "=vr,vr")
> > + (truncate:<V_DOUBLE_TRUNC>
> > + (any_shiftrt:VWEXTI
> > + (match_operand:VWEXTI 1 "register_operand" " vr,vr")
> > + (any_extend:VWEXTI
> > + (match_operand:<V_DOUBLE_TRUNC> 2 "vector_shift_operand" " vr,vk")))))]
> > + "TARGET_VECTOR"
> > + "#"
> > + "&& can_create_pseudo_p ()"
> > + [(const_int 0)]
> > +{
> > + insn_code icode = code_for_pred_narrow (<any_shiftrt:CODE>, <MODE>mode);
> > + riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_BINOP, operands);
> > + DONE;
> > +}
> > + [(set_attr "type" "vnshift")
> > + (set_attr "mode" "<V_DOUBLE_TRUNC>")])
> > +
> > +(define_insn_and_split "*<any_shiftrt:optab>trunc<mode>"
> > + [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand" "=vr")
> > + (truncate:<V_DOUBLE_TRUNC>
> > + (any_shiftrt:VWEXTI
> > + (match_operand:VWEXTI 1 "register_operand" " vr")
> > + (match_operand:<VEL> 2 "csr_operand" " rK"))))]
> > + "TARGET_VECTOR"
> > + "#"
> > + "&& can_create_pseudo_p ()"
> > + [(const_int 0)]
> > +{
> > + operands[2] = gen_lowpart (Pmode, operands[2]);
> > + insn_code icode = code_for_pred_narrow_scalar (<any_shiftrt:CODE>, <MODE>mode);
> > + riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_BINOP, operands);
> > + DONE;
> > +}
> > + [(set_attr "type" "vnshift")
> > + (set_attr "mode" "<V_DOUBLE_TRUNC>")])
> > diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
> > index b7070099f29..eadc2c5b595 100644
> > --- a/gcc/config/riscv/autovec.md
> > +++ b/gcc/config/riscv/autovec.md
> > @@ -150,18 +150,23 @@
> > ;; - vsll.vi/vsra.vi/vsrl.vi
> > ;; -------------------------------------------------------------------------
> > -(define_expand "<optab><mode>3"
> > - [(set (match_operand:VI 0 "register_operand")
> > +(define_insn_and_split "<optab><mode>3"
> > + [(set (match_operand:VI 0 "register_operand" "=vr")
> > (any_shift:VI
> > - (match_operand:VI 1 "register_operand")
> > - (match_operand:<VEL> 2 "csr_operand")))]
> > + (match_operand:VI 1 "register_operand" " vr")
> > + (match_operand:<VEL> 2 "csr_operand" " rK")))]
> > "TARGET_VECTOR"
> > + "#"
> > + "&& can_create_pseudo_p ()"
> > + [(const_int 0)]
> > {
> > operands[2] = gen_lowpart (Pmode, operands[2]);
> > riscv_vector::emit_vlmax_insn (code_for_pred_scalar (<CODE>, <MODE>mode),
> > riscv_vector::RVV_BINOP, operands);
> > DONE;
> > -})
> > +}
> > + [(set_attr "type" "vshift")
> > + (set_attr "mode" "<MODE>")])
> > ;; -------------------------------------------------------------------------
> > ;; ---- [INT] Binary shifts by scalar.
> > @@ -170,17 +175,22 @@
> > ;; - vsll.vv/vsra.vv/vsrl.vv
> > ;; -------------------------------------------------------------------------
> > -(define_expand "v<optab><mode>3"
> > - [(set (match_operand:VI 0 "register_operand")
> > +(define_insn_and_split "v<optab><mode>3"
> > + [(set (match_operand:VI 0 "register_operand" "=vr,vr")
> > (any_shift:VI
> > - (match_operand:VI 1 "register_operand")
> > - (match_operand:VI 2 "vector_shift_operand")))]
> > + (match_operand:VI 1 "register_operand" " vr,vr")
> > + (match_operand:VI 2 "vector_shift_operand" " vr,vk")))]
> > "TARGET_VECTOR"
> > + "#"
> > + "&& can_create_pseudo_p ()"
> > + [(const_int 0)]
> > {
> > riscv_vector::emit_vlmax_insn (code_for_pred (<CODE>, <MODE>mode),
> > riscv_vector::RVV_BINOP, operands);
> > DONE;
> > -})
> > +}
> > + [(set_attr "type" "vshift")
> > + (set_attr "mode" "<MODE>")])
> > ;; -------------------------------------------------------------------------
> > ;; ---- [BOOL] Binary logical operations
> > @@ -395,16 +405,21 @@
> > ;; -------------------------------------------------------------------------
> > ;; - vncvt.x.x.w
> > ;; -------------------------------------------------------------------------
> > -(define_expand "trunc<mode><v_double_trunc>2"
> > - [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand")
> > +(define_insn_and_split "trunc<mode><v_double_trunc>2"
> > + [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand" "=vr")
> > (truncate:<V_DOUBLE_TRUNC>
> > - (match_operand:VWEXTI 1 "register_operand")))]
> > + (match_operand:VWEXTI 1 "register_operand" " vr")))]
> > "TARGET_VECTOR"
> > + "#"
> > + "&& can_create_pseudo_p ()"
> > + [(const_int 0)]
> > {
> > insn_code icode = code_for_pred_trunc (<MODE>mode);
> > riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, operands);
> > DONE;
> > -})
> > +}
> > + [(set_attr "type" "vshift")
> > + (set_attr "mode" "<MODE>")])
> > ;; -------------------------------------------------------------------------
> > ;; Truncation to a mode whose inner mode size is a quarter of mode's.
> > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-1.c
> > new file mode 100644
> > index 00000000000..3de8d85b52d
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-1.c
> > @@ -0,0 +1,31 @@
> > +/* { dg-do compile } */
> > +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
> > +
> > +#include <stdint-gcc.h>
> > +
> > +#define TEST_TYPE(TYPE1, TYPE2) \
> > + __attribute__ ((noipa)) void vnshift_##TYPE1##_##TYPE2 ( \
> > + TYPE2 *__restrict dst, TYPE1 *__restrict a, TYPE2 *__restrict b, int n) \
> > + { \
> > + for (int i = 0; i < n; i++) \
> > + dst[i] = a[i] >> b[i]; \
> > + }
> > +
> > +#define TEST_ALL() \
> > + TEST_TYPE (int16_t, int8_t) \
> > + TEST_TYPE (int16_t, uint8_t) \
> > + TEST_TYPE (uint16_t, int8_t) \
> > + TEST_TYPE (uint16_t, uint8_t) \
> > + TEST_TYPE (int32_t, int16_t) \
> > + TEST_TYPE (int32_t, uint16_t) \
> > + TEST_TYPE (uint32_t, int16_t) \
> > + TEST_TYPE (uint32_t, uint16_t) \
> > + TEST_TYPE (int64_t, int32_t) \
> > + TEST_TYPE (int64_t, uint32_t) \
> > + TEST_TYPE (uint64_t, int32_t) \
> > + TEST_TYPE (uint64_t, uint32_t)
> > +
> > +TEST_ALL ()
> > +
> > +/* { dg-final { scan-assembler-times {\tvnsra\.wv} 6 } } */
> > +/* { dg-final { scan-assembler-times {\tvnsrl\.wv} 5 } } */
> > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-2.c
> > new file mode 100644
> > index 00000000000..e5c2e37f5fa
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-2.c
> > @@ -0,0 +1,32 @@
> > +/* { dg-do compile } */
> > +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
> > +
> > +#include <stdint-gcc.h>
> > +
> > +#define TEST_TYPE(TYPE1, TYPE2) \
> > + __attribute__ (( \
> > + noipa)) void vnshift_##TYPE1##_##TYPE2 (TYPE2 *__restrict dst, \
> > + TYPE1 *__restrict a, int n) \
> > + { \
> > + for (int i = 0; i < n; i++) \
> > + dst[i] = a[i] >> 7; \
> > + }
> > +
> > +#define TEST_ALL() \
> > + TEST_TYPE (int16_t, int8_t) \
> > + TEST_TYPE (int16_t, uint8_t) \
> > + TEST_TYPE (uint16_t, int8_t) \
> > + TEST_TYPE (uint16_t, uint8_t) \
> > + TEST_TYPE (int32_t, int16_t) \
> > + TEST_TYPE (int32_t, uint16_t) \
> > + TEST_TYPE (uint32_t, int16_t) \
> > + TEST_TYPE (uint32_t, uint16_t) \
> > + TEST_TYPE (int64_t, int32_t) \
> > + TEST_TYPE (int64_t, uint32_t) \
> > + TEST_TYPE (uint64_t, int32_t) \
> > + TEST_TYPE (uint64_t, uint32_t)
> > +
> > +TEST_ALL ()
> > +
> > +/* { dg-final { scan-assembler-times {\tvnsra\.wi} 6 } } */
> > +/* { dg-final { scan-assembler-times {\tvnsrl\.wi} 6 } } */
> > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-3.c
> > new file mode 100644
> > index 00000000000..3b288466394
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-3.c
> > @@ -0,0 +1,31 @@
> > +/* { dg-do compile } */
> > +/* { dg-additional-options "-march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=scalable" } */
> > +
> > +#include <stdint-gcc.h>
> > +
> > +#define TEST_TYPE(TYPE1, TYPE2) \
> > + __attribute__ ((noipa)) void vnshift_##TYPE1##_##TYPE2 ( \
> > + TYPE2 *__restrict dst, TYPE1 *__restrict a, TYPE2 b, int n) \
> > + { \
> > + for (int i = 0; i < n; i++) \
> > + dst[i] = a[i] >> b; \
> > + }
> > +
> > +#define TEST_ALL() \
> > + TEST_TYPE (int16_t, int8_t) \
> > + TEST_TYPE (int16_t, uint8_t) \
> > + TEST_TYPE (uint16_t, int8_t) \
> > + TEST_TYPE (uint16_t, uint8_t) \
> > + TEST_TYPE (int32_t, int16_t) \
> > + TEST_TYPE (int32_t, uint16_t) \
> > + TEST_TYPE (uint32_t, int16_t) \
> > + TEST_TYPE (uint32_t, uint16_t) \
> > + TEST_TYPE (int64_t, int32_t) \
> > + TEST_TYPE (int64_t, uint32_t) \
> > + TEST_TYPE (uint64_t, int32_t) \
> > + TEST_TYPE (uint64_t, uint32_t)
> > +
> > +TEST_ALL ()
> > +
> > +/* { dg-final { scan-assembler-times {\tvnsra\.wx} 4 } } */
> > +/* { dg-final { scan-assembler-times {\tvnsrl\.wx} 4 } } */
> > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c
> > new file mode 100644
> > index 00000000000..2a898104fa8
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c
> > @@ -0,0 +1,50 @@
> > +/* { dg-do run { target { riscv_vector } } } */
> > +/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
> > +
> > +#include <assert.h>
> > +#include "narrow-1.c"
> > +
> > +#define RUN(TYPE1, TYPE2, SZ) \
> > + TYPE1 a##TYPE1##_##TYPE2##_##SZ[SZ]; \
> > + TYPE2 b##TYPE1##_##TYPE2##_##SZ[SZ]; \
> > + TYPE2 dst##TYPE1##_##TYPE2##_##SZ[SZ]; \
> > + for (int i = 0; i < SZ; i++) \
> > + { \
> > + a##TYPE1##_##TYPE2##_##SZ[i] = i % 8723; \
> > + b##TYPE1##_##TYPE2##_##SZ[i] = i % (sizeof (TYPE2) * 3); \
> > + } \
> > + vnshift_##TYPE1##_##TYPE2 (dst##TYPE1##_##TYPE2##_##SZ, \
> > + a##TYPE1##_##TYPE2##_##SZ, \
> > + b##TYPE1##_##TYPE2##_##SZ, SZ); \
> > + for (int i = 0; i < SZ; i++) \
> > + { \
> > + assert (dst##TYPE1##_##TYPE2##_##SZ[i] \
> > + == (TYPE2) (a##TYPE1##_##TYPE2##_##SZ[i] \
> > + >> b##TYPE1##_##TYPE2##_##SZ[i])); \
> > + }
> > +
> > +#define RUN_ALL(SZ) \
> > + RUN (int16_t, int8_t, SZ) \
> > + RUN (int16_t, uint8_t, SZ) \
> > + RUN (uint16_t, int8_t, SZ) \
> > + RUN (uint16_t, uint8_t, SZ) \
> > + RUN (int32_t, int16_t, SZ) \
> > + RUN (int32_t, uint16_t, SZ) \
> > + RUN (uint32_t, int16_t, SZ) \
> > + RUN (uint32_t, uint16_t, SZ) \
> > + RUN (int64_t, int32_t, SZ) \
> > + RUN (int64_t, uint32_t, SZ) \
> > + RUN (uint64_t, int32_t, SZ) \
> > + RUN (uint64_t, uint32_t, SZ)
> > +
> > +int
> > +main ()
> > +{
> > + RUN_ALL (15)
> > + RUN_ALL (16)
> > + RUN_ALL (17)
> > + RUN_ALL (127)
> > + RUN_ALL (128)
> > + RUN_ALL (129)
> > + RUN_ALL (512)
> > +}
> > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c
> > new file mode 100644
> > index 00000000000..1630ba1a5f8
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c
> > @@ -0,0 +1,46 @@
> > +/* { dg-do run { target { riscv_vector } } } */
> > +/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
> > +
> > +#include <assert.h>
> > +#include "narrow-2.c"
> > +
> > +#define RUN(TYPE1, TYPE2, SZ) \
> > + TYPE1 a##TYPE1##_##TYPE2##_##SZ[SZ]; \
> > + TYPE2 dst##TYPE1##_##TYPE2##_##SZ[SZ]; \
> > + for (int i = 0; i < SZ; i++) \
> > + { \
> > + a##TYPE1##_##TYPE2##_##SZ[i] = i % 8723; \
> > + } \
> > + vnshift_##TYPE1##_##TYPE2 (dst##TYPE1##_##TYPE2##_##SZ, \
> > + a##TYPE1##_##TYPE2##_##SZ, SZ); \
> > + for (int i = 0; i < SZ; i++) \
> > + { \
> > + assert (dst##TYPE1##_##TYPE2##_##SZ[i] \
> > + == (TYPE2) (a##TYPE1##_##TYPE2##_##SZ[i] >> 7)); \
> > + }
> > +
> > +#define RUN_ALL(SZ) \
> > + RUN (int16_t, int8_t, SZ) \
> > + RUN (int16_t, uint8_t, SZ) \
> > + RUN (uint16_t, int8_t, SZ) \
> > + RUN (uint16_t, uint8_t, SZ) \
> > + RUN (int32_t, int16_t, SZ) \
> > + RUN (int32_t, uint16_t, SZ) \
> > + RUN (uint32_t, int16_t, SZ) \
> > + RUN (uint32_t, uint16_t, SZ) \
> > + RUN (int64_t, int32_t, SZ) \
> > + RUN (int64_t, uint32_t, SZ) \
> > + RUN (uint64_t, int32_t, SZ) \
> > + RUN (uint64_t, uint32_t, SZ)
> > +
> > +int
> > +main ()
> > +{
> > + RUN_ALL (15)
> > + RUN_ALL (16)
> > + RUN_ALL (17)
> > + RUN_ALL (127)
> > + RUN_ALL (128)
> > + RUN_ALL (129)
> > + RUN_ALL (512)
> > +}
> > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c
> > new file mode 100644
> > index 00000000000..7638851e4fa
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c
> > @@ -0,0 +1,46 @@
> > +/* { dg-do run { target { riscv_vector } } } */
> > +/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
> > +
> > +#include <assert.h>
> > +#include "narrow-3.c"
> > +
> > +#define RUN(TYPE1, TYPE2, SZ) \
> > + TYPE1 a##TYPE1##_##TYPE2##_##SZ[SZ]; \
> > + TYPE2 dst##TYPE1##_##TYPE2##_##SZ[SZ]; \
> > + for (int i = 0; i < SZ; i++) \
> > + { \
> > + a##TYPE1##_##TYPE2##_##SZ[i] = i % 8723; \
> > + } \
> > + vnshift_##TYPE1##_##TYPE2 (dst##TYPE1##_##TYPE2##_##SZ, \
> > + a##TYPE1##_##TYPE2##_##SZ, 9, SZ); \
> > + for (int i = 0; i < SZ; i++) \
> > + { \
> > + assert (dst##TYPE1##_##TYPE2##_##SZ[i] \
> > + == (TYPE2) (a##TYPE1##_##TYPE2##_##SZ[i] >> 9)); \
> > + }
> > +
> > +#define RUN_ALL(SZ) \
> > + RUN (int16_t, int8_t, SZ) \
> > + RUN (int16_t, uint8_t, SZ) \
> > + RUN (uint16_t, int8_t, SZ) \
> > + RUN (uint16_t, uint8_t, SZ) \
> > + RUN (int32_t, int16_t, SZ) \
> > + RUN (int32_t, uint16_t, SZ) \
> > + RUN (uint32_t, int16_t, SZ) \
> > + RUN (uint32_t, uint16_t, SZ) \
> > + RUN (int64_t, int32_t, SZ) \
> > + RUN (int64_t, uint32_t, SZ) \
> > + RUN (uint64_t, int32_t, SZ) \
> > + RUN (uint64_t, uint32_t, SZ)
> > +
> > +int
> > +main ()
> > +{
> > + RUN_ALL (15)
> > + RUN_ALL (16)
> > + RUN_ALL (17)
> > + RUN_ALL (127)
> > + RUN_ALL (128)
> > + RUN_ALL (129)
> > + RUN_ALL (512)
> > +}
> > --
> > 2.36.3
> >
> >
>
Committed, thanks Kito and will take care of the define_insn_and_split part in another PATCH.
Pan
-----Original Message-----
From: Gcc-patches <gcc-patches-bounces+pan2.li=intel.com@gcc.gnu.org> On Behalf Of Kito Cheng via Gcc-patches
Sent: Monday, June 12, 2023 8:45 PM
To: juzhe.zhong@rivai.ai
Cc: kito.cheng <kito.cheng@gmail.com>; gcc-patches <gcc-patches@gcc.gnu.org>; palmer <palmer@dabbelt.com>; palmer <palmer@rivosinc.com>; jeffreyalaw <jeffreyalaw@gmail.com>; Robin Dapp <rdapp.gcc@gmail.com>
Subject: Re: Re: [PATCH] RISC-V: Add RVV narrow shift right lowering auto-vectorization
Yes, change all define_insn_and_split to that style, "TARGET_VECTOR && can_create_pseudo_p ()"/ "&& 1", my understanding is all those patterns should only work before RA, so all using "TARGET_VECTOR && can_create_pseudo_p ()" is more reasonable.
On Mon, Jun 12, 2023 at 8:41 PM juzhe.zhong@rivai.ai <juzhe.zhong@rivai.ai> wrote:
>
> You mean change all split pattern like this ?
> ;; This helps to match zero_extend + sign_extend + fma.
> (define_insn_and_split "*zero_sign_extend_fma"
> [(set (match_operand:VWEXTI 0 "register_operand")
> (plus:VWEXTI
> (mult:VWEXTI
> (zero_extend:VWEXTI
> (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))
> (sign_extend:VWEXTI
> (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand")))
> (match_operand:VWEXTI 1 "register_operand")))]
> "TARGET_VECTOR && can_create_pseudo_p ()"
> "#"
> "&& 1"
> [(const_int 0)]
>
> ________________________________
> juzhe.zhong@rivai.ai
>
>
> From: Kito Cheng
> Date: 2023-06-12 20:37
> To: juzhe.zhong@rivai.ai
> CC: gcc-patches; Kito.cheng; palmer; palmer; jeffreyalaw; Robin Dapp
> Subject: Re: [PATCH] RISC-V: Add RVV narrow shift right lowering
> auto-vectorization We have two style predictor for those
> define_insn_and_split patterns, "TARGET_VECTOR"/"&&
> can_create_pseudo_p ()" and "TARGET_VECTOR && can_create_pseudo_p
> ()"/"&& 1", could you unify all to later form? I feel that would be
> safer since those patterns are really only valid before
> RA(can_create_pseudo_p() == true), although it's mostly used by
> combine pass so it's mostly safe, but IMO we should fix this soon rather than fix that until we hit this later.
>
> OK for this patch as it is, and I would like to have a separated patch
> to fix all those issues.
>
> On Mon, Jun 12, 2023 at 8:27 PM juzhe.zhong@rivai.ai
> <juzhe.zhong@rivai.ai> wrote:
> >
> > Is this patch ok for trunk?
> >
> >
> >
> > juzhe.zhong@rivai.ai
> >
> > From: juzhe.zhong
> > Date: 2023-06-12 10:41
> > To: gcc-patches
> > CC: kito.cheng; kito.cheng; palmer; palmer; jeffreyalaw; rdapp.gcc;
> > Juzhe-Zhong
> > Subject: [PATCH] RISC-V: Add RVV narrow shift right lowering
> > auto-vectorization
> > From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
> >
> > Optimize the following auto-vectorization codes:
> > void foo (int16_t * __restrict a, int32_t * __restrict b, int32_t c,
> > int n) {
> > for (int i = 0; i < n; i++)
> > a[i] = b[i] >> c;
> > }
> >
> > Before this patch:
> > foo:
> > ble a3,zero,.L5
> > .L3:
> > vsetvli a5,a3,e32,m1,ta,ma
> > vle32.v v1,0(a1)
> > vsetvli a4,zero,e32,m1,ta,ma
> > vsra.vx v1,v1,a2
> > vsetvli zero,zero,e16,mf2,ta,ma
> > slli a7,a5,2
> > vncvt.x.x.w v1,v1
> > slli a6,a5,1
> > vsetvli zero,a5,e16,mf2,ta,ma
> > sub a3,a3,a5
> > vse16.v v1,0(a0)
> > add a1,a1,a7
> > add a0,a0,a6
> > bne a3,zero,.L3
> > .L5:
> > ret
> >
> > After this patch:
> > foo:
> > ble a3,zero,.L5
> > .L3:
> > vsetvli a5,a3,e32,m1,ta,ma
> > vle32.v v1,0(a1)
> > vsetvli a7,zero,e16,mf2,ta,ma
> > slli a6,a5,2
> > vnsra.wx v1,v1,a2
> > slli a4,a5,1
> > vsetvli zero,a5,e16,mf2,ta,ma
> > sub a3,a3,a5
> > vse16.v v1,0(a0)
> > add a1,a1,a6
> > add a0,a0,a4
> > bne a3,zero,.L3
> > .L5:
> > ret
> >
> > gcc/ChangeLog:
> >
> > * config/riscv/autovec-opt.md (*v<any_shiftrt:optab><any_extend:optab>trunc<mode>): New pattern.
> > (*<any_shiftrt:optab>trunc<mode>): Ditto.
> > * config/riscv/autovec.md (<optab><mode>3): Change to define_insn_and_split.
> > (v<optab><mode>3): Ditto.
> > (trunc<mode><v_double_trunc>2): Ditto.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/riscv/rvv/autovec/binop/narrow-1.c: New test.
> > * gcc.target/riscv/rvv/autovec/binop/narrow-2.c: New test.
> > * gcc.target/riscv/rvv/autovec/binop/narrow-3.c: New test.
> > * gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c: New test.
> > * gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c: New test.
> > * gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c: New test.
> >
> > ---
> > gcc/config/riscv/autovec-opt.md | 46 +++++++++++++++++
> > gcc/config/riscv/autovec.md | 43 ++++++++++------
> > .../riscv/rvv/autovec/binop/narrow-1.c | 31 ++++++++++++
> > .../riscv/rvv/autovec/binop/narrow-2.c | 32 ++++++++++++
> > .../riscv/rvv/autovec/binop/narrow-3.c | 31 ++++++++++++
> > .../riscv/rvv/autovec/binop/narrow_run-1.c | 50 +++++++++++++++++++
> > .../riscv/rvv/autovec/binop/narrow_run-2.c | 46 +++++++++++++++++
> > .../riscv/rvv/autovec/binop/narrow_run-3.c | 46 +++++++++++++++++
> > 8 files changed, 311 insertions(+), 14 deletions(-) create mode
> > 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-1.c
> > create mode 100644
> > gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-2.c
> > create mode 100644
> > gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-3.c
> > create mode 100644
> > gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c
> > create mode 100644
> > gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c
> > create mode 100644
> > gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c
> >
> > diff --git a/gcc/config/riscv/autovec-opt.md
> > b/gcc/config/riscv/autovec-opt.md index 7bb93eed220..aef28e445e1
> > 100644
> > --- a/gcc/config/riscv/autovec-opt.md
> > +++ b/gcc/config/riscv/autovec-opt.md
> > @@ -330,3 +330,49 @@
> > }
> > [(set_attr "type" "viwmuladd")
> > (set_attr "mode" "<V_DOUBLE_TRUNC>")])
> > +
> > +;;
> > +-------------------------------------------------------------------
> > +------ ;; ---- [INT] Binary narrow shifts.
> > +;;
> > +-------------------------------------------------------------------
> > +------
> > +;; Includes:
> > +;; - vnsrl.wv/vnsrl.wx/vnsrl.wi
> > +;; - vnsra.wv/vnsra.wx/vnsra.wi
> > +;;
> > +-------------------------------------------------------------------
> > +------
> > +
> > +(define_insn_and_split "*v<any_shiftrt:optab><any_extend:optab>trunc<mode>"
> > + [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand" "=vr,vr")
> > + (truncate:<V_DOUBLE_TRUNC>
> > + (any_shiftrt:VWEXTI
> > + (match_operand:VWEXTI 1 "register_operand" " vr,vr")
> > + (any_extend:VWEXTI
> > + (match_operand:<V_DOUBLE_TRUNC> 2 "vector_shift_operand"
> > +" vr,vk")))))]
> > + "TARGET_VECTOR"
> > + "#"
> > + "&& can_create_pseudo_p ()"
> > + [(const_int 0)]
> > +{
> > + insn_code icode = code_for_pred_narrow (<any_shiftrt:CODE>,
> > +<MODE>mode);
> > + riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_BINOP,
> > +operands);
> > + DONE;
> > +}
> > + [(set_attr "type" "vnshift")
> > + (set_attr "mode" "<V_DOUBLE_TRUNC>")])
> > +
> > +(define_insn_and_split "*<any_shiftrt:optab>trunc<mode>"
> > + [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand" "=vr")
> > + (truncate:<V_DOUBLE_TRUNC>
> > + (any_shiftrt:VWEXTI
> > + (match_operand:VWEXTI 1 "register_operand" " vr")
> > + (match_operand:<VEL> 2 "csr_operand" " rK"))))]
> > + "TARGET_VECTOR"
> > + "#"
> > + "&& can_create_pseudo_p ()"
> > + [(const_int 0)]
> > +{
> > + operands[2] = gen_lowpart (Pmode, operands[2]);
> > + insn_code icode = code_for_pred_narrow_scalar
> > +(<any_shiftrt:CODE>, <MODE>mode);
> > + riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_BINOP,
> > +operands);
> > + DONE;
> > +}
> > + [(set_attr "type" "vnshift")
> > + (set_attr "mode" "<V_DOUBLE_TRUNC>")])
> > diff --git a/gcc/config/riscv/autovec.md
> > b/gcc/config/riscv/autovec.md index b7070099f29..eadc2c5b595 100644
> > --- a/gcc/config/riscv/autovec.md
> > +++ b/gcc/config/riscv/autovec.md
> > @@ -150,18 +150,23 @@
> > ;; - vsll.vi/vsra.vi/vsrl.vi
> > ;;
> > --------------------------------------------------------------------
> > -----
> > -(define_expand "<optab><mode>3"
> > - [(set (match_operand:VI 0 "register_operand")
> > +(define_insn_and_split "<optab><mode>3"
> > + [(set (match_operand:VI 0 "register_operand" "=vr")
> > (any_shift:VI
> > - (match_operand:VI 1 "register_operand")
> > - (match_operand:<VEL> 2 "csr_operand")))]
> > + (match_operand:VI 1 "register_operand" " vr")
> > + (match_operand:<VEL> 2 "csr_operand" " rK")))]
> > "TARGET_VECTOR"
> > + "#"
> > + "&& can_create_pseudo_p ()"
> > + [(const_int 0)]
> > {
> > operands[2] = gen_lowpart (Pmode, operands[2]);
> > riscv_vector::emit_vlmax_insn (code_for_pred_scalar (<CODE>,
> > <MODE>mode), riscv_vector::RVV_BINOP, operands);
> > DONE;
> > -})
> > +}
> > + [(set_attr "type" "vshift")
> > + (set_attr "mode" "<MODE>")])
> > ;;
> > --------------------------------------------------------------------
> > ----- ;; ---- [INT] Binary shifts by scalar.
> > @@ -170,17 +175,22 @@
> > ;; - vsll.vv/vsra.vv/vsrl.vv
> > ;;
> > --------------------------------------------------------------------
> > -----
> > -(define_expand "v<optab><mode>3"
> > - [(set (match_operand:VI 0 "register_operand")
> > +(define_insn_and_split "v<optab><mode>3"
> > + [(set (match_operand:VI 0 "register_operand" "=vr,vr")
> > (any_shift:VI
> > - (match_operand:VI 1 "register_operand")
> > - (match_operand:VI 2 "vector_shift_operand")))]
> > + (match_operand:VI 1 "register_operand" " vr,vr")
> > + (match_operand:VI 2 "vector_shift_operand" " vr,vk")))]
> > "TARGET_VECTOR"
> > + "#"
> > + "&& can_create_pseudo_p ()"
> > + [(const_int 0)]
> > {
> > riscv_vector::emit_vlmax_insn (code_for_pred (<CODE>,
> > <MODE>mode), riscv_vector::RVV_BINOP, operands);
> > DONE;
> > -})
> > +}
> > + [(set_attr "type" "vshift")
> > + (set_attr "mode" "<MODE>")])
> > ;;
> > --------------------------------------------------------------------
> > ----- ;; ---- [BOOL] Binary logical operations @@ -395,16 +405,21 @@
> > ;;
> > --------------------------------------------------------------------
> > -----
> > ;; - vncvt.x.x.w
> > ;;
> > --------------------------------------------------------------------
> > ----- -(define_expand "trunc<mode><v_double_trunc>2"
> > - [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand")
> > +(define_insn_and_split "trunc<mode><v_double_trunc>2"
> > + [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand" "=vr")
> > (truncate:<V_DOUBLE_TRUNC>
> > - (match_operand:VWEXTI 1 "register_operand")))]
> > + (match_operand:VWEXTI 1 "register_operand" " vr")))]
> > "TARGET_VECTOR"
> > + "#"
> > + "&& can_create_pseudo_p ()"
> > + [(const_int 0)]
> > {
> > insn_code icode = code_for_pred_trunc (<MODE>mode);
> > riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, operands);
> > DONE;
> > -})
> > +}
> > + [(set_attr "type" "vshift")
> > + (set_attr "mode" "<MODE>")])
> > ;;
> > --------------------------------------------------------------------
> > ----- ;; Truncation to a mode whose inner mode size is a quarter of
> > mode's.
> > diff --git
> > a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-1.c
> > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-1.c
> > new file mode 100644
> > index 00000000000..3de8d85b52d
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-1.c
> > @@ -0,0 +1,31 @@
> > +/* { dg-do compile } */
> > +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d
> > +--param=riscv-autovec-preference=scalable" } */
> > +
> > +#include <stdint-gcc.h>
> > +
> > +#define TEST_TYPE(TYPE1, TYPE2) \
> > + __attribute__ ((noipa)) void vnshift_##TYPE1##_##TYPE2 ( \
> > + TYPE2 *__restrict dst, TYPE1 *__restrict a, TYPE2 *__restrict b, int n) \
> > + { \
> > + for (int i = 0; i < n; i++) \
> > + dst[i] = a[i] >> b[i]; \
> > + }
> > +
> > +#define TEST_ALL() \
> > + TEST_TYPE (int16_t, int8_t) \
> > + TEST_TYPE (int16_t, uint8_t) \
> > + TEST_TYPE (uint16_t, int8_t) \
> > + TEST_TYPE (uint16_t, uint8_t) \
> > + TEST_TYPE (int32_t, int16_t) \
> > + TEST_TYPE (int32_t, uint16_t) \
> > + TEST_TYPE (uint32_t, int16_t) \
> > + TEST_TYPE (uint32_t, uint16_t) \
> > + TEST_TYPE (int64_t, int32_t) \
> > + TEST_TYPE (int64_t, uint32_t) \
> > + TEST_TYPE (uint64_t, int32_t) \
> > + TEST_TYPE (uint64_t, uint32_t)
> > +
> > +TEST_ALL ()
> > +
> > +/* { dg-final { scan-assembler-times {\tvnsra\.wv} 6 } } */
> > +/* { dg-final { scan-assembler-times {\tvnsrl\.wv} 5 } } */
> > diff --git
> > a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-2.c
> > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-2.c
> > new file mode 100644
> > index 00000000000..e5c2e37f5fa
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-2.c
> > @@ -0,0 +1,32 @@
> > +/* { dg-do compile } */
> > +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d
> > +--param=riscv-autovec-preference=scalable" } */
> > +
> > +#include <stdint-gcc.h>
> > +
> > +#define TEST_TYPE(TYPE1, TYPE2) \
> > + __attribute__ (( \
> > + noipa)) void vnshift_##TYPE1##_##TYPE2 (TYPE2 *__restrict dst, \
> > + TYPE1 *__restrict a, int n) \
> > + { \
> > + for (int i = 0; i < n; i++) \
> > + dst[i] = a[i] >> 7; \
> > + }
> > +
> > +#define TEST_ALL() \
> > + TEST_TYPE (int16_t, int8_t) \
> > + TEST_TYPE (int16_t, uint8_t) \
> > + TEST_TYPE (uint16_t, int8_t) \
> > + TEST_TYPE (uint16_t, uint8_t) \
> > + TEST_TYPE (int32_t, int16_t) \
> > + TEST_TYPE (int32_t, uint16_t) \
> > + TEST_TYPE (uint32_t, int16_t) \
> > + TEST_TYPE (uint32_t, uint16_t) \
> > + TEST_TYPE (int64_t, int32_t) \
> > + TEST_TYPE (int64_t, uint32_t) \
> > + TEST_TYPE (uint64_t, int32_t) \
> > + TEST_TYPE (uint64_t, uint32_t)
> > +
> > +TEST_ALL ()
> > +
> > +/* { dg-final { scan-assembler-times {\tvnsra\.wi} 6 } } */
> > +/* { dg-final { scan-assembler-times {\tvnsrl\.wi} 6 } } */
> > diff --git
> > a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-3.c
> > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-3.c
> > new file mode 100644
> > index 00000000000..3b288466394
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-3.c
> > @@ -0,0 +1,31 @@
> > +/* { dg-do compile } */
> > +/* { dg-additional-options "-march=rv64gcv -mabi=lp64d
> > +--param=riscv-autovec-preference=scalable" } */
> > +
> > +#include <stdint-gcc.h>
> > +
> > +#define TEST_TYPE(TYPE1, TYPE2) \
> > + __attribute__ ((noipa)) void vnshift_##TYPE1##_##TYPE2 ( \
> > + TYPE2 *__restrict dst, TYPE1 *__restrict a, TYPE2 b, int n) \
> > + { \
> > + for (int i = 0; i < n; i++) \
> > + dst[i] = a[i] >> b; \
> > + }
> > +
> > +#define TEST_ALL() \
> > + TEST_TYPE (int16_t, int8_t) \
> > + TEST_TYPE (int16_t, uint8_t) \
> > + TEST_TYPE (uint16_t, int8_t) \
> > + TEST_TYPE (uint16_t, uint8_t) \
> > + TEST_TYPE (int32_t, int16_t) \
> > + TEST_TYPE (int32_t, uint16_t) \
> > + TEST_TYPE (uint32_t, int16_t) \
> > + TEST_TYPE (uint32_t, uint16_t) \
> > + TEST_TYPE (int64_t, int32_t) \
> > + TEST_TYPE (int64_t, uint32_t) \
> > + TEST_TYPE (uint64_t, int32_t) \
> > + TEST_TYPE (uint64_t, uint32_t)
> > +
> > +TEST_ALL ()
> > +
> > +/* { dg-final { scan-assembler-times {\tvnsra\.wx} 4 } } */
> > +/* { dg-final { scan-assembler-times {\tvnsrl\.wx} 4 } } */
> > diff --git
> > a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c
> > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c
> > new file mode 100644
> > index 00000000000..2a898104fa8
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-1.
> > +++ c
> > @@ -0,0 +1,50 @@
> > +/* { dg-do run { target { riscv_vector } } } */
> > +/* { dg-additional-options
> > +"--param=riscv-autovec-preference=scalable" } */
> > +
> > +#include <assert.h>
> > +#include "narrow-1.c"
> > +
> > +#define RUN(TYPE1, TYPE2, SZ) \
> > + TYPE1 a##TYPE1##_##TYPE2##_##SZ[SZ]; \
> > + TYPE2 b##TYPE1##_##TYPE2##_##SZ[SZ]; \
> > + TYPE2 dst##TYPE1##_##TYPE2##_##SZ[SZ]; \
> > + for (int i = 0; i < SZ; i++) \
> > + { \
> > + a##TYPE1##_##TYPE2##_##SZ[i] = i % 8723; \
> > + b##TYPE1##_##TYPE2##_##SZ[i] = i % (sizeof (TYPE2) * 3); \
> > + } \
> > + vnshift_##TYPE1##_##TYPE2 (dst##TYPE1##_##TYPE2##_##SZ, \
> > + a##TYPE1##_##TYPE2##_##SZ, \
> > + b##TYPE1##_##TYPE2##_##SZ, SZ); \
> > + for (int i = 0; i < SZ; i++) \
> > + { \
> > + assert (dst##TYPE1##_##TYPE2##_##SZ[i] \
> > + == (TYPE2) (a##TYPE1##_##TYPE2##_##SZ[i] \
> > + >> b##TYPE1##_##TYPE2##_##SZ[i])); \
> > + }
> > +
> > +#define RUN_ALL(SZ) \
> > + RUN (int16_t, int8_t, SZ) \
> > + RUN (int16_t, uint8_t, SZ) \
> > + RUN (uint16_t, int8_t, SZ) \
> > + RUN (uint16_t, uint8_t, SZ) \
> > + RUN (int32_t, int16_t, SZ) \
> > + RUN (int32_t, uint16_t, SZ) \
> > + RUN (uint32_t, int16_t, SZ) \
> > + RUN (uint32_t, uint16_t, SZ) \
> > + RUN (int64_t, int32_t, SZ) \
> > + RUN (int64_t, uint32_t, SZ) \
> > + RUN (uint64_t, int32_t, SZ) \
> > + RUN (uint64_t, uint32_t, SZ)
> > +
> > +int
> > +main ()
> > +{
> > + RUN_ALL (15)
> > + RUN_ALL (16)
> > + RUN_ALL (17)
> > + RUN_ALL (127)
> > + RUN_ALL (128)
> > + RUN_ALL (129)
> > + RUN_ALL (512)
> > +}
> > diff --git
> > a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c
> > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c
> > new file mode 100644
> > index 00000000000..1630ba1a5f8
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-2.
> > +++ c
> > @@ -0,0 +1,46 @@
> > +/* { dg-do run { target { riscv_vector } } } */
> > +/* { dg-additional-options
> > +"--param=riscv-autovec-preference=scalable" } */
> > +
> > +#include <assert.h>
> > +#include "narrow-2.c"
> > +
> > +#define RUN(TYPE1, TYPE2, SZ) \
> > + TYPE1 a##TYPE1##_##TYPE2##_##SZ[SZ]; \
> > + TYPE2 dst##TYPE1##_##TYPE2##_##SZ[SZ]; \
> > + for (int i = 0; i < SZ; i++) \
> > + { \
> > + a##TYPE1##_##TYPE2##_##SZ[i] = i % 8723; \
> > + } \
> > + vnshift_##TYPE1##_##TYPE2 (dst##TYPE1##_##TYPE2##_##SZ, \
> > + a##TYPE1##_##TYPE2##_##SZ, SZ); \
> > + for (int i = 0; i < SZ; i++) \
> > + { \
> > + assert (dst##TYPE1##_##TYPE2##_##SZ[i] \
> > + == (TYPE2) (a##TYPE1##_##TYPE2##_##SZ[i] >> 7)); \
> > + }
> > +
> > +#define RUN_ALL(SZ) \
> > + RUN (int16_t, int8_t, SZ) \
> > + RUN (int16_t, uint8_t, SZ) \
> > + RUN (uint16_t, int8_t, SZ) \
> > + RUN (uint16_t, uint8_t, SZ) \
> > + RUN (int32_t, int16_t, SZ) \
> > + RUN (int32_t, uint16_t, SZ) \
> > + RUN (uint32_t, int16_t, SZ) \
> > + RUN (uint32_t, uint16_t, SZ) \
> > + RUN (int64_t, int32_t, SZ) \
> > + RUN (int64_t, uint32_t, SZ) \
> > + RUN (uint64_t, int32_t, SZ) \
> > + RUN (uint64_t, uint32_t, SZ)
> > +
> > +int
> > +main ()
> > +{
> > + RUN_ALL (15)
> > + RUN_ALL (16)
> > + RUN_ALL (17)
> > + RUN_ALL (127)
> > + RUN_ALL (128)
> > + RUN_ALL (129)
> > + RUN_ALL (512)
> > +}
> > diff --git
> > a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c
> > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c
> > new file mode 100644
> > index 00000000000..7638851e4fa
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-3.
> > +++ c
> > @@ -0,0 +1,46 @@
> > +/* { dg-do run { target { riscv_vector } } } */
> > +/* { dg-additional-options
> > +"--param=riscv-autovec-preference=scalable" } */
> > +
> > +#include <assert.h>
> > +#include "narrow-3.c"
> > +
> > +#define RUN(TYPE1, TYPE2, SZ) \
> > + TYPE1 a##TYPE1##_##TYPE2##_##SZ[SZ]; \
> > + TYPE2 dst##TYPE1##_##TYPE2##_##SZ[SZ]; \
> > + for (int i = 0; i < SZ; i++) \
> > + { \
> > + a##TYPE1##_##TYPE2##_##SZ[i] = i % 8723; \
> > + } \
> > + vnshift_##TYPE1##_##TYPE2 (dst##TYPE1##_##TYPE2##_##SZ, \
> > + a##TYPE1##_##TYPE2##_##SZ, 9, SZ); \
> > + for (int i = 0; i < SZ; i++) \
> > + { \
> > + assert (dst##TYPE1##_##TYPE2##_##SZ[i] \
> > + == (TYPE2) (a##TYPE1##_##TYPE2##_##SZ[i] >> 9)); \
> > + }
> > +
> > +#define RUN_ALL(SZ) \
> > + RUN (int16_t, int8_t, SZ) \
> > + RUN (int16_t, uint8_t, SZ) \
> > + RUN (uint16_t, int8_t, SZ) \
> > + RUN (uint16_t, uint8_t, SZ) \
> > + RUN (int32_t, int16_t, SZ) \
> > + RUN (int32_t, uint16_t, SZ) \
> > + RUN (uint32_t, int16_t, SZ) \
> > + RUN (uint32_t, uint16_t, SZ) \
> > + RUN (int64_t, int32_t, SZ) \
> > + RUN (int64_t, uint32_t, SZ) \
> > + RUN (uint64_t, int32_t, SZ) \
> > + RUN (uint64_t, uint32_t, SZ)
> > +
> > +int
> > +main ()
> > +{
> > + RUN_ALL (15)
> > + RUN_ALL (16)
> > + RUN_ALL (17)
> > + RUN_ALL (127)
> > + RUN_ALL (128)
> > + RUN_ALL (129)
> > + RUN_ALL (512)
> > +}
> > --
> > 2.36.3
> >
> >
>
@@ -330,3 +330,49 @@
}
[(set_attr "type" "viwmuladd")
(set_attr "mode" "<V_DOUBLE_TRUNC>")])
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Binary narrow shifts.
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - vnsrl.wv/vnsrl.wx/vnsrl.wi
+;; - vnsra.wv/vnsra.wx/vnsra.wi
+;; -------------------------------------------------------------------------
+
+(define_insn_and_split "*v<any_shiftrt:optab><any_extend:optab>trunc<mode>"
+ [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand" "=vr,vr")
+ (truncate:<V_DOUBLE_TRUNC>
+ (any_shiftrt:VWEXTI
+ (match_operand:VWEXTI 1 "register_operand" " vr,vr")
+ (any_extend:VWEXTI
+ (match_operand:<V_DOUBLE_TRUNC> 2 "vector_shift_operand" " vr,vk")))))]
+ "TARGET_VECTOR"
+ "#"
+ "&& can_create_pseudo_p ()"
+ [(const_int 0)]
+{
+ insn_code icode = code_for_pred_narrow (<any_shiftrt:CODE>, <MODE>mode);
+ riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_BINOP, operands);
+ DONE;
+}
+ [(set_attr "type" "vnshift")
+ (set_attr "mode" "<V_DOUBLE_TRUNC>")])
+
+(define_insn_and_split "*<any_shiftrt:optab>trunc<mode>"
+ [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand" "=vr")
+ (truncate:<V_DOUBLE_TRUNC>
+ (any_shiftrt:VWEXTI
+ (match_operand:VWEXTI 1 "register_operand" " vr")
+ (match_operand:<VEL> 2 "csr_operand" " rK"))))]
+ "TARGET_VECTOR"
+ "#"
+ "&& can_create_pseudo_p ()"
+ [(const_int 0)]
+{
+ operands[2] = gen_lowpart (Pmode, operands[2]);
+ insn_code icode = code_for_pred_narrow_scalar (<any_shiftrt:CODE>, <MODE>mode);
+ riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_BINOP, operands);
+ DONE;
+}
+ [(set_attr "type" "vnshift")
+ (set_attr "mode" "<V_DOUBLE_TRUNC>")])
@@ -150,18 +150,23 @@
;; - vsll.vi/vsra.vi/vsrl.vi
;; -------------------------------------------------------------------------
-(define_expand "<optab><mode>3"
- [(set (match_operand:VI 0 "register_operand")
+(define_insn_and_split "<optab><mode>3"
+ [(set (match_operand:VI 0 "register_operand" "=vr")
(any_shift:VI
- (match_operand:VI 1 "register_operand")
- (match_operand:<VEL> 2 "csr_operand")))]
+ (match_operand:VI 1 "register_operand" " vr")
+ (match_operand:<VEL> 2 "csr_operand" " rK")))]
"TARGET_VECTOR"
+ "#"
+ "&& can_create_pseudo_p ()"
+ [(const_int 0)]
{
operands[2] = gen_lowpart (Pmode, operands[2]);
riscv_vector::emit_vlmax_insn (code_for_pred_scalar (<CODE>, <MODE>mode),
riscv_vector::RVV_BINOP, operands);
DONE;
-})
+}
+ [(set_attr "type" "vshift")
+ (set_attr "mode" "<MODE>")])
;; -------------------------------------------------------------------------
;; ---- [INT] Binary shifts by scalar.
@@ -170,17 +175,22 @@
;; - vsll.vv/vsra.vv/vsrl.vv
;; -------------------------------------------------------------------------
-(define_expand "v<optab><mode>3"
- [(set (match_operand:VI 0 "register_operand")
+(define_insn_and_split "v<optab><mode>3"
+ [(set (match_operand:VI 0 "register_operand" "=vr,vr")
(any_shift:VI
- (match_operand:VI 1 "register_operand")
- (match_operand:VI 2 "vector_shift_operand")))]
+ (match_operand:VI 1 "register_operand" " vr,vr")
+ (match_operand:VI 2 "vector_shift_operand" " vr,vk")))]
"TARGET_VECTOR"
+ "#"
+ "&& can_create_pseudo_p ()"
+ [(const_int 0)]
{
riscv_vector::emit_vlmax_insn (code_for_pred (<CODE>, <MODE>mode),
riscv_vector::RVV_BINOP, operands);
DONE;
-})
+}
+ [(set_attr "type" "vshift")
+ (set_attr "mode" "<MODE>")])
;; -------------------------------------------------------------------------
;; ---- [BOOL] Binary logical operations
@@ -395,16 +405,21 @@
;; -------------------------------------------------------------------------
;; - vncvt.x.x.w
;; -------------------------------------------------------------------------
-(define_expand "trunc<mode><v_double_trunc>2"
- [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand")
+(define_insn_and_split "trunc<mode><v_double_trunc>2"
+ [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand" "=vr")
(truncate:<V_DOUBLE_TRUNC>
- (match_operand:VWEXTI 1 "register_operand")))]
+ (match_operand:VWEXTI 1 "register_operand" " vr")))]
"TARGET_VECTOR"
+ "#"
+ "&& can_create_pseudo_p ()"
+ [(const_int 0)]
{
insn_code icode = code_for_pred_trunc (<MODE>mode);
riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, operands);
DONE;
-})
+}
+ [(set_attr "type" "vshift")
+ (set_attr "mode" "<MODE>")])
;; -------------------------------------------------------------------------
;; Truncation to a mode whose inner mode size is a quarter of mode's.
new file mode 100644
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
+
+#include <stdint-gcc.h>
+
+#define TEST_TYPE(TYPE1, TYPE2) \
+ __attribute__ ((noipa)) void vnshift_##TYPE1##_##TYPE2 ( \
+ TYPE2 *__restrict dst, TYPE1 *__restrict a, TYPE2 *__restrict b, int n) \
+ { \
+ for (int i = 0; i < n; i++) \
+ dst[i] = a[i] >> b[i]; \
+ }
+
+#define TEST_ALL() \
+ TEST_TYPE (int16_t, int8_t) \
+ TEST_TYPE (int16_t, uint8_t) \
+ TEST_TYPE (uint16_t, int8_t) \
+ TEST_TYPE (uint16_t, uint8_t) \
+ TEST_TYPE (int32_t, int16_t) \
+ TEST_TYPE (int32_t, uint16_t) \
+ TEST_TYPE (uint32_t, int16_t) \
+ TEST_TYPE (uint32_t, uint16_t) \
+ TEST_TYPE (int64_t, int32_t) \
+ TEST_TYPE (int64_t, uint32_t) \
+ TEST_TYPE (uint64_t, int32_t) \
+ TEST_TYPE (uint64_t, uint32_t)
+
+TEST_ALL ()
+
+/* { dg-final { scan-assembler-times {\tvnsra\.wv} 6 } } */
+/* { dg-final { scan-assembler-times {\tvnsrl\.wv} 5 } } */
new file mode 100644
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
+
+#include <stdint-gcc.h>
+
+#define TEST_TYPE(TYPE1, TYPE2) \
+ __attribute__ (( \
+ noipa)) void vnshift_##TYPE1##_##TYPE2 (TYPE2 *__restrict dst, \
+ TYPE1 *__restrict a, int n) \
+ { \
+ for (int i = 0; i < n; i++) \
+ dst[i] = a[i] >> 7; \
+ }
+
+#define TEST_ALL() \
+ TEST_TYPE (int16_t, int8_t) \
+ TEST_TYPE (int16_t, uint8_t) \
+ TEST_TYPE (uint16_t, int8_t) \
+ TEST_TYPE (uint16_t, uint8_t) \
+ TEST_TYPE (int32_t, int16_t) \
+ TEST_TYPE (int32_t, uint16_t) \
+ TEST_TYPE (uint32_t, int16_t) \
+ TEST_TYPE (uint32_t, uint16_t) \
+ TEST_TYPE (int64_t, int32_t) \
+ TEST_TYPE (int64_t, uint32_t) \
+ TEST_TYPE (uint64_t, int32_t) \
+ TEST_TYPE (uint64_t, uint32_t)
+
+TEST_ALL ()
+
+/* { dg-final { scan-assembler-times {\tvnsra\.wi} 6 } } */
+/* { dg-final { scan-assembler-times {\tvnsrl\.wi} 6 } } */
new file mode 100644
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=scalable" } */
+
+#include <stdint-gcc.h>
+
+#define TEST_TYPE(TYPE1, TYPE2) \
+ __attribute__ ((noipa)) void vnshift_##TYPE1##_##TYPE2 ( \
+ TYPE2 *__restrict dst, TYPE1 *__restrict a, TYPE2 b, int n) \
+ { \
+ for (int i = 0; i < n; i++) \
+ dst[i] = a[i] >> b; \
+ }
+
+#define TEST_ALL() \
+ TEST_TYPE (int16_t, int8_t) \
+ TEST_TYPE (int16_t, uint8_t) \
+ TEST_TYPE (uint16_t, int8_t) \
+ TEST_TYPE (uint16_t, uint8_t) \
+ TEST_TYPE (int32_t, int16_t) \
+ TEST_TYPE (int32_t, uint16_t) \
+ TEST_TYPE (uint32_t, int16_t) \
+ TEST_TYPE (uint32_t, uint16_t) \
+ TEST_TYPE (int64_t, int32_t) \
+ TEST_TYPE (int64_t, uint32_t) \
+ TEST_TYPE (uint64_t, int32_t) \
+ TEST_TYPE (uint64_t, uint32_t)
+
+TEST_ALL ()
+
+/* { dg-final { scan-assembler-times {\tvnsra\.wx} 4 } } */
+/* { dg-final { scan-assembler-times {\tvnsrl\.wx} 4 } } */
new file mode 100644
@@ -0,0 +1,50 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
+
+#include <assert.h>
+#include "narrow-1.c"
+
+#define RUN(TYPE1, TYPE2, SZ) \
+ TYPE1 a##TYPE1##_##TYPE2##_##SZ[SZ]; \
+ TYPE2 b##TYPE1##_##TYPE2##_##SZ[SZ]; \
+ TYPE2 dst##TYPE1##_##TYPE2##_##SZ[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ { \
+ a##TYPE1##_##TYPE2##_##SZ[i] = i % 8723; \
+ b##TYPE1##_##TYPE2##_##SZ[i] = i % (sizeof (TYPE2) * 3); \
+ } \
+ vnshift_##TYPE1##_##TYPE2 (dst##TYPE1##_##TYPE2##_##SZ, \
+ a##TYPE1##_##TYPE2##_##SZ, \
+ b##TYPE1##_##TYPE2##_##SZ, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ { \
+ assert (dst##TYPE1##_##TYPE2##_##SZ[i] \
+ == (TYPE2) (a##TYPE1##_##TYPE2##_##SZ[i] \
+ >> b##TYPE1##_##TYPE2##_##SZ[i])); \
+ }
+
+#define RUN_ALL(SZ) \
+ RUN (int16_t, int8_t, SZ) \
+ RUN (int16_t, uint8_t, SZ) \
+ RUN (uint16_t, int8_t, SZ) \
+ RUN (uint16_t, uint8_t, SZ) \
+ RUN (int32_t, int16_t, SZ) \
+ RUN (int32_t, uint16_t, SZ) \
+ RUN (uint32_t, int16_t, SZ) \
+ RUN (uint32_t, uint16_t, SZ) \
+ RUN (int64_t, int32_t, SZ) \
+ RUN (int64_t, uint32_t, SZ) \
+ RUN (uint64_t, int32_t, SZ) \
+ RUN (uint64_t, uint32_t, SZ)
+
+int
+main ()
+{
+ RUN_ALL (15)
+ RUN_ALL (16)
+ RUN_ALL (17)
+ RUN_ALL (127)
+ RUN_ALL (128)
+ RUN_ALL (129)
+ RUN_ALL (512)
+}
new file mode 100644
@@ -0,0 +1,46 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
+
+#include <assert.h>
+#include "narrow-2.c"
+
+#define RUN(TYPE1, TYPE2, SZ) \
+ TYPE1 a##TYPE1##_##TYPE2##_##SZ[SZ]; \
+ TYPE2 dst##TYPE1##_##TYPE2##_##SZ[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ { \
+ a##TYPE1##_##TYPE2##_##SZ[i] = i % 8723; \
+ } \
+ vnshift_##TYPE1##_##TYPE2 (dst##TYPE1##_##TYPE2##_##SZ, \
+ a##TYPE1##_##TYPE2##_##SZ, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ { \
+ assert (dst##TYPE1##_##TYPE2##_##SZ[i] \
+ == (TYPE2) (a##TYPE1##_##TYPE2##_##SZ[i] >> 7)); \
+ }
+
+#define RUN_ALL(SZ) \
+ RUN (int16_t, int8_t, SZ) \
+ RUN (int16_t, uint8_t, SZ) \
+ RUN (uint16_t, int8_t, SZ) \
+ RUN (uint16_t, uint8_t, SZ) \
+ RUN (int32_t, int16_t, SZ) \
+ RUN (int32_t, uint16_t, SZ) \
+ RUN (uint32_t, int16_t, SZ) \
+ RUN (uint32_t, uint16_t, SZ) \
+ RUN (int64_t, int32_t, SZ) \
+ RUN (int64_t, uint32_t, SZ) \
+ RUN (uint64_t, int32_t, SZ) \
+ RUN (uint64_t, uint32_t, SZ)
+
+int
+main ()
+{
+ RUN_ALL (15)
+ RUN_ALL (16)
+ RUN_ALL (17)
+ RUN_ALL (127)
+ RUN_ALL (128)
+ RUN_ALL (129)
+ RUN_ALL (512)
+}
new file mode 100644
@@ -0,0 +1,46 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
+
+#include <assert.h>
+#include "narrow-3.c"
+
+#define RUN(TYPE1, TYPE2, SZ) \
+ TYPE1 a##TYPE1##_##TYPE2##_##SZ[SZ]; \
+ TYPE2 dst##TYPE1##_##TYPE2##_##SZ[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ { \
+ a##TYPE1##_##TYPE2##_##SZ[i] = i % 8723; \
+ } \
+ vnshift_##TYPE1##_##TYPE2 (dst##TYPE1##_##TYPE2##_##SZ, \
+ a##TYPE1##_##TYPE2##_##SZ, 9, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ { \
+ assert (dst##TYPE1##_##TYPE2##_##SZ[i] \
+ == (TYPE2) (a##TYPE1##_##TYPE2##_##SZ[i] >> 9)); \
+ }
+
+#define RUN_ALL(SZ) \
+ RUN (int16_t, int8_t, SZ) \
+ RUN (int16_t, uint8_t, SZ) \
+ RUN (uint16_t, int8_t, SZ) \
+ RUN (uint16_t, uint8_t, SZ) \
+ RUN (int32_t, int16_t, SZ) \
+ RUN (int32_t, uint16_t, SZ) \
+ RUN (uint32_t, int16_t, SZ) \
+ RUN (uint32_t, uint16_t, SZ) \
+ RUN (int64_t, int32_t, SZ) \
+ RUN (int64_t, uint32_t, SZ) \
+ RUN (uint64_t, int32_t, SZ) \
+ RUN (uint64_t, uint32_t, SZ)
+
+int
+main ()
+{
+ RUN_ALL (15)
+ RUN_ALL (16)
+ RUN_ALL (17)
+ RUN_ALL (127)
+ RUN_ALL (128)
+ RUN_ALL (129)
+ RUN_ALL (512)
+}