RISC-V: Support cond vfsgnj.vv autovec pattern
Checks
Commit Message
This patch add combine patterns to combine vfsgnj.vv + vcond_mask
to mask vfsgnj.vv. For vfsgnjx.vv, it can not be produced in midend
currently. We will send another patch to take this issue.
gcc/ChangeLog:
* config/riscv/autovec-opt.md (*copysign<mode>_neg): Move.
(*cond_copysign<mode>): New combine pattern.
* config/riscv/riscv-v.cc (needs_fp_rounding): Extend.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c: New test.
* gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c: New test.
* gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c: New test.
* gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h: New test.
* gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c: New test.
---
gcc/config/riscv/autovec-opt.md | 68 +++++++++----
gcc/config/riscv/riscv-v.cc | 4 +-
.../rvv/autovec/cond/cond_copysign-run.c | 99 +++++++++++++++++++
.../rvv/autovec/cond/cond_copysign-rv32gcv.c | 12 +++
.../rvv/autovec/cond/cond_copysign-rv64gcv.c | 12 +++
.../rvv/autovec/cond/cond_copysign-template.h | 81 +++++++++++++++
.../rvv/autovec/cond/cond_copysign-zvfh-run.c | 93 +++++++++++++++++
7 files changed, 349 insertions(+), 20 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c
Comments
LGTM
On Wed, Sep 13, 2023 at 12:25 AM Lehua Ding <lehua.ding@rivai.ai> wrote:
>
> This patch add combine patterns to combine vfsgnj.vv + vcond_mask
> to mask vfsgnj.vv. For vfsgnjx.vv, it can not be produced in midend
> currently. We will send another patch to take this issue.
>
> gcc/ChangeLog:
>
> * config/riscv/autovec-opt.md (*copysign<mode>_neg): Move.
> (*cond_copysign<mode>): New combine pattern.
> * config/riscv/riscv-v.cc (needs_fp_rounding): Extend.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c: New test.
> * gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c: New test.
> * gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c: New test.
> * gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h: New test.
> * gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c: New test.
>
> ---
> gcc/config/riscv/autovec-opt.md | 68 +++++++++----
> gcc/config/riscv/riscv-v.cc | 4 +-
> .../rvv/autovec/cond/cond_copysign-run.c | 99 +++++++++++++++++++
> .../rvv/autovec/cond/cond_copysign-rv32gcv.c | 12 +++
> .../rvv/autovec/cond/cond_copysign-rv64gcv.c | 12 +++
> .../rvv/autovec/cond/cond_copysign-template.h | 81 +++++++++++++++
> .../rvv/autovec/cond/cond_copysign-zvfh-run.c | 93 +++++++++++++++++
> 7 files changed, 349 insertions(+), 20 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c
>
> diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
> index 58e80044f1e..f759525f96b 100644
> --- a/gcc/config/riscv/autovec-opt.md
> +++ b/gcc/config/riscv/autovec-opt.md
> @@ -609,6 +609,10 @@
> (set_attr "mode" "<V_DOUBLE_TRUNC>")
> (set (attr "frm_mode") (symbol_ref "riscv_vector::FRM_DYN"))])
>
> +;; =============================================================================
> +;; Combine op + vmerge to cond_op
> +;; =============================================================================
> +
> ;; Combine <op> and vcond_mask generated by midend into cond_len_<op>
> ;; Currently supported operations:
> ;; abs(FP)
> @@ -651,25 +655,6 @@
> DONE;
> })
>
> -;; Combine vlmax neg and UNSPEC_VCOPYSIGN
> -(define_insn_and_split "*copysign<mode>_neg"
> - [(set (match_operand:VF 0 "register_operand")
> - (neg:VF
> - (unspec:VF [
> - (match_operand:VF 1 "register_operand")
> - (match_operand:VF 2 "register_operand")
> - ] UNSPEC_VCOPYSIGN)))]
> - "TARGET_VECTOR && can_create_pseudo_p ()"
> - "#"
> - "&& 1"
> - [(const_int 0)]
> -{
> - riscv_vector::emit_vlmax_insn (code_for_pred_ncopysign (<MODE>mode),
> - riscv_vector::BINARY_OP, operands);
> - DONE;
> -}
> -[(set_attr "type" "vector")])
> -
> ;; Combine sign_extend/zero_extend(vf2) and vcond_mask
> (define_insn_and_split "*cond_<optab><v_double_trunc><mode>"
> [(set (match_operand:VWEXTI 0 "register_operand")
> @@ -918,6 +903,27 @@
> }
> [(set_attr "type" "vector")])
>
> +;; Combine vfsgnj.vv + vcond_mask
> +(define_insn_and_split "*cond_copysign<mode>"
> + [(set (match_operand:VF 0 "register_operand")
> + (if_then_else:VF
> + (match_operand:<VM> 1 "register_operand")
> + (unspec:VF
> + [(match_operand:VF 2 "register_operand")
> + (match_operand:VF 3 "register_operand")] UNSPEC_VCOPYSIGN)
> + (match_operand:VF 4 "register_operand")))]
> + "TARGET_VECTOR && can_create_pseudo_p ()"
> + "#"
> + "&& 1"
> + [(const_int 0)]
> +{
> + insn_code icode = code_for_pred (UNSPEC_VCOPYSIGN, <MODE>mode);
> + rtx ops[] = {operands[0], operands[1], operands[2], operands[3], operands[4],
> + gen_int_mode (GET_MODE_NUNITS (<MODE>mode), Pmode)};
> + riscv_vector::expand_cond_len_binop (icode, ops);
> + DONE;
> +})
> +
> ;; =============================================================================
> ;; Combine extend + binop to widen_binop
> ;; =============================================================================
> @@ -1119,3 +1125,27 @@
> DONE;
> }
> [(set_attr "type" "vfwmul")])
> +
> +
> +;; =============================================================================
> +;; Misc combine patterns
> +;; =============================================================================
> +
> +;; Combine vlmax neg and UNSPEC_VCOPYSIGN
> +(define_insn_and_split "*copysign<mode>_neg"
> + [(set (match_operand:VF 0 "register_operand")
> + (neg:VF
> + (unspec:VF [
> + (match_operand:VF 1 "register_operand")
> + (match_operand:VF 2 "register_operand")
> + ] UNSPEC_VCOPYSIGN)))]
> + "TARGET_VECTOR && can_create_pseudo_p ()"
> + "#"
> + "&& 1"
> + [(const_int 0)]
> +{
> + riscv_vector::emit_vlmax_insn (code_for_pred_ncopysign (<MODE>mode),
> + riscv_vector::BINARY_OP, operands);
> + DONE;
> +}
> +[(set_attr "type" "vector")])
> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> index 4d95bd773a2..76e6094f45b 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -2970,7 +2970,9 @@ needs_fp_rounding (unsigned icode, machine_mode mode)
> && icode != maybe_code_for_pred_extend (mode)
> /* narrower-INT -> FP */
> && icode != maybe_code_for_pred_widen (FLOAT, mode)
> - && icode != maybe_code_for_pred_widen (UNSIGNED_FLOAT, mode);
> + && icode != maybe_code_for_pred_widen (UNSIGNED_FLOAT, mode)
> + /* vfsgnj */
> + && icode != maybe_code_for_pred (UNSPEC_VCOPYSIGN, mode);
> }
>
> /* Subroutine to expand COND_LEN_* patterns. */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c
> new file mode 100644
> index 00000000000..be37854c135
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c
> @@ -0,0 +1,99 @@
> +/* { dg-do run { target { riscv_vector } } } */
> +/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
> +
> +#include "cond_copysign-template.h"
> +
> +#include <assert.h>
> +
> +#define SZ 512
> +
> +#define EPS 1e-6
> +
> +#define INIT_PRED() \
> + int pred[SZ]; \
> + for (int i = 0; i < SZ; i++) \
> + { \
> + pred[i] = i % 3; \
> + }
> +
> +#define RUN(TYPE, VAL) \
> + TYPE a##TYPE[SZ]; \
> + TYPE b##TYPE[SZ]; \
> + for (int i = 0; i < SZ; i++) \
> + { \
> + a##TYPE[i] = i; \
> + b##TYPE[i] = (i & 1) ? VAL : -VAL; \
> + } \
> + copysign_##TYPE (a##TYPE, a##TYPE, b##TYPE, pred, SZ); \
> + for (int i = 0; i < SZ; i++) \
> + assert (!pred[i] || __builtin_fabs (a##TYPE[i] - ((i & 1) ? i : -i)) < EPS);
> +
> +#define RUN2(TYPE, VAL) \
> + TYPE a2##TYPE[SZ]; \
> + for (int i = 0; i < SZ; i++) \
> + a2##TYPE[i] = i; \
> + copysigns_##TYPE (a2##TYPE, a2##TYPE, -VAL, pred, SZ); \
> + for (int i = 0; i < SZ; i++) \
> + assert (!pred[i] || __builtin_fabs (a2##TYPE[i] + i) < EPS);
> +
> +#define RUN3(TYPE, VAL) \
> + TYPE a3##TYPE[SZ]; \
> + TYPE b3##TYPE[SZ]; \
> + for (int i = 0; i < SZ; i++) \
> + { \
> + a3##TYPE[i] = (i & 1) ? -i : i; \
> + b3##TYPE[i] = (i & 1) ? VAL : -VAL; \
> + } \
> + xorsign_##TYPE (a3##TYPE, a3##TYPE, b3##TYPE, pred, SZ); \
> + for (int i = 0; i < SZ; i++) \
> + assert (!pred[i] || __builtin_fabs (a3##TYPE[i] + i) < EPS);
> +
> +#define RUN4(TYPE, VAL) \
> + TYPE a4##TYPE[SZ]; \
> + for (int i = 0; i < SZ; i++) \
> + a4##TYPE[i] = -i; \
> + xorsigns_##TYPE (a4##TYPE, a4##TYPE, -VAL, pred, SZ); \
> + for (int i = 0; i < SZ; i++) \
> + assert (!pred[i] || __builtin_fabs (a4##TYPE[i] - i) < EPS);
> +
> +#define RUN5(TYPE, VAL) \
> + TYPE a5##TYPE[SZ]; \
> + TYPE b5##TYPE[SZ]; \
> + for (int i = 0; i < SZ; i++) \
> + { \
> + a5##TYPE[i] = i; \
> + b5##TYPE[i] = (i & 1) ? VAL : -VAL; \
> + } \
> + ncopysign_##TYPE (a5##TYPE, a5##TYPE, b##TYPE, pred, SZ); \
> + for (int i = 0; i < SZ; i++) \
> + assert (!pred[i] \
> + || __builtin_fabs (-a5##TYPE[i] - ((i & 1) ? i : -i)) < EPS);
> +
> +#define RUN6(TYPE, VAL) \
> + TYPE a6##TYPE[SZ]; \
> + for (int i = 0; i < SZ; i++) \
> + a6##TYPE[i] = i; \
> + ncopysigns_##TYPE (a6##TYPE, a6##TYPE, -VAL, pred, SZ); \
> + for (int i = 0; i < SZ; i++) \
> + assert (!pred[i] || __builtin_fabs (-a6##TYPE[i] + i) < EPS);
> +
> +#define RUN_ALL() \
> + RUN (float, 5) \
> + RUN (double, 6) \
> + RUN2 (float, 11) \
> + RUN2 (double, 12) \
> + RUN3 (float, 16) \
> + RUN3 (double, 18) \
> + RUN4 (float, 17) \
> + RUN4 (double, 19) \
> + RUN5 (float, 123) \
> + RUN5 (double, 523) \
> + RUN6 (float, 777) \
> + RUN6 (double, 877)
> +
> +int
> +main ()
> +{
> + INIT_PRED ()
> + RUN_ALL ()
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c
> new file mode 100644
> index 00000000000..cef531b9700
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-std=c99 -O3 -fno-vect-cost-model -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
> +
> +#include "cond_copysign-template.h"
> +
> +/* { dg-final { scan-assembler-times {\tvfsgnj\.vv} 6 } } */
> +/* 1. The vectorizer wraps scalar variants of copysign into vector constants which
> + expand cannot handle currently.
> + 2. match.pd convert .COPYSIGN (1, b) + COND_MUL to AND + XOR currently. */
> +/* { dg-final { scan-assembler-times {\tvfsgnjx\.vv} 6 { xfail riscv*-*-* } } } */
> +/* { dg-final { scan-assembler-times {\tvfsgnjn\.vv} 6 } } */
> +/* { dg-final { scan-assembler-not {\tvmerge\.vvm} } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c
> new file mode 100644
> index 00000000000..cc2aa4de757
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-std=c99 -O3 -fno-vect-cost-model -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
> +
> +#include "cond_copysign-template.h"
> +
> +/* { dg-final { scan-assembler-times {\tvfsgnj\.vv} 6 } } */
> +/* 1. The vectorizer wraps scalar variants of copysign into vector constants which
> + expand cannot handle currently.
> + 2. match.pd convert .COPYSIGN (1, b) + COND_MUL to AND + XOR currently. */
> +/* { dg-final { scan-assembler-times {\tvfsgnjx\.vv} 6 { xfail riscv*-*-* } } } */
> +/* { dg-final { scan-assembler-times {\tvfsgnjn\.vv} 6 } } */
> +/* { dg-final { scan-assembler-not {\tvmerge\.vvm} } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h
> new file mode 100644
> index 00000000000..4191500fd83
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h
> @@ -0,0 +1,81 @@
> +#include <stdint-gcc.h>
> +
> +#define TEST_TYPE(TYPE, SUFFIX) \
> + __attribute__ ((noipa)) void copysign_##TYPE (TYPE *restrict dst, \
> + TYPE *restrict a, \
> + TYPE *restrict b, \
> + int *restrict pred, int n) \
> + { \
> + for (int i = 0; i < n; i++) \
> + dst[i] = pred[i] ? __builtin_copysign##SUFFIX (a[i], b[i]) : dst[i]; \
> + }
> +
> +#define TEST_TYPE2(TYPE, SUFFIX) \
> + __attribute__ ((noipa)) void copysigns_##TYPE (TYPE *restrict dst, \
> + TYPE *restrict a, TYPE b, \
> + int *restrict pred, int n) \
> + { \
> + for (int i = 0; i < n; i++) \
> + dst[i] = pred[i] ? __builtin_copysign##SUFFIX (a[i], b) : dst[i]; \
> + }
> +
> +#define TEST_TYPE3(TYPE, SUFFIX) \
> + __attribute__ ((noipa)) void xorsign_##TYPE (TYPE *restrict dst, \
> + TYPE *restrict a, \
> + TYPE *restrict b, \
> + int *restrict pred, int n) \
> + { \
> + for (int i = 0; i < n; i++) \
> + dst[i] \
> + = pred[i] ? a[i] * __builtin_copysign##SUFFIX (1.0, b[i]) : dst[i]; \
> + }
> +
> +#define TEST_TYPE4(TYPE, SUFFIX) \
> + __attribute__ ((noipa)) void xorsigns_##TYPE (TYPE *restrict dst, \
> + TYPE *restrict a, TYPE b, \
> + int *restrict pred, int n) \
> + { \
> + for (int i = 0; i < n; i++) \
> + dst[i] = pred[i] ? a[i] * __builtin_copysign##SUFFIX (1.0, b) : dst[i]; \
> + }
> +
> +#define TEST_TYPE5(TYPE, SUFFIX) \
> + __attribute__ ((noipa)) void ncopysign_##TYPE (TYPE *restrict dst, \
> + TYPE *restrict a, \
> + TYPE *restrict b, \
> + int *restrict pred, int n) \
> + { \
> + for (int i = 0; i < n; i++) \
> + dst[i] = pred[i] ? -__builtin_copysign##SUFFIX (a[i], b[i]) : dst[i]; \
> + }
> +
> +#define TEST_TYPE6(TYPE, SUFFIX) \
> + __attribute__ ((noipa)) void ncopysigns_##TYPE (TYPE *restrict dst, \
> + TYPE *restrict a, TYPE b, \
> + int *restrict pred, int n) \
> + { \
> + for (int i = 0; i < n; i++) \
> + dst[i] = pred[i] ? -__builtin_copysign##SUFFIX (a[i], b) : dst[i]; \
> + }
> +
> +#define TEST_ALL() \
> + TEST_TYPE (_Float16, f16) \
> + TEST_TYPE (float, f) \
> + TEST_TYPE (double, ) \
> + TEST_TYPE2 (_Float16, f16) \
> + TEST_TYPE2 (float, f) \
> + TEST_TYPE2 (double, ) \
> + TEST_TYPE3 (_Float16, f16) \
> + TEST_TYPE3 (float, f) \
> + TEST_TYPE3 (double, ) \
> + TEST_TYPE4 (_Float16, f16) \
> + TEST_TYPE4 (float, f) \
> + TEST_TYPE4 (double, ) \
> + TEST_TYPE5 (_Float16, f16) \
> + TEST_TYPE5 (float, f) \
> + TEST_TYPE5 (double, ) \
> + TEST_TYPE6 (_Float16, f16) \
> + TEST_TYPE6 (float, f) \
> + TEST_TYPE6 (double, )
> +
> +TEST_ALL ()
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c
> new file mode 100644
> index 00000000000..6e337f9e74c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c
> @@ -0,0 +1,93 @@
> +/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
> +/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
> +
> +#include "cond_copysign-template.h"
> +
> +#include <assert.h>
> +
> +#define SZ 512
> +
> +#define EPS 1e-6
> +
> +#define INIT_PRED() \
> + int pred[SZ]; \
> + for (int i = 0; i < SZ; i++) \
> + { \
> + pred[i] = i % 3; \
> + }
> +
> +#define RUN(TYPE, VAL) \
> + TYPE a##TYPE[SZ]; \
> + TYPE b##TYPE[SZ]; \
> + for (int i = 0; i < SZ; i++) \
> + { \
> + a##TYPE[i] = i; \
> + b##TYPE[i] = (i & 1) ? VAL : -VAL; \
> + } \
> + copysign_##TYPE (a##TYPE, a##TYPE, b##TYPE, pred, SZ); \
> + for (int i = 0; i < SZ; i++) \
> + assert (!pred[i] || __builtin_fabs (a##TYPE[i] - ((i & 1) ? i : -i)) < EPS);
> +
> +#define RUN2(TYPE, VAL) \
> + TYPE a2##TYPE[SZ]; \
> + for (int i = 0; i < SZ; i++) \
> + a2##TYPE[i] = i; \
> + copysigns_##TYPE (a2##TYPE, a2##TYPE, -VAL, pred, SZ); \
> + for (int i = 0; i < SZ; i++) \
> + assert (!pred[i] || __builtin_fabs (a2##TYPE[i] + i) < EPS);
> +
> +#define RUN3(TYPE, VAL) \
> + TYPE a3##TYPE[SZ]; \
> + TYPE b3##TYPE[SZ]; \
> + for (int i = 0; i < SZ; i++) \
> + { \
> + a3##TYPE[i] = (i & 1) ? -i : i; \
> + b3##TYPE[i] = (i & 1) ? VAL : -VAL; \
> + } \
> + xorsign_##TYPE (a3##TYPE, a3##TYPE, b3##TYPE, pred, SZ); \
> + for (int i = 0; i < SZ; i++) \
> + assert (!pred[i] || __builtin_fabs (a3##TYPE[i] + i) < EPS);
> +
> +#define RUN4(TYPE, VAL) \
> + TYPE a4##TYPE[SZ]; \
> + for (int i = 0; i < SZ; i++) \
> + a4##TYPE[i] = -i; \
> + xorsigns_##TYPE (a4##TYPE, a4##TYPE, -VAL, pred, SZ); \
> + for (int i = 0; i < SZ; i++) \
> + assert (!pred[i] || __builtin_fabs (a4##TYPE[i] - i) < EPS);
> +
> +#define RUN5(TYPE, VAL) \
> + TYPE a5##TYPE[SZ]; \
> + TYPE b5##TYPE[SZ]; \
> + for (int i = 0; i < SZ; i++) \
> + { \
> + a5##TYPE[i] = i; \
> + b5##TYPE[i] = (i & 1) ? VAL : -VAL; \
> + } \
> + ncopysign_##TYPE (a5##TYPE, a5##TYPE, b##TYPE, pred, SZ); \
> + for (int i = 0; i < SZ; i++) \
> + assert (!pred[i] \
> + || __builtin_fabs (-a5##TYPE[i] - ((i & 1) ? i : -i)) < EPS);
> +
> +#define RUN6(TYPE, VAL) \
> + TYPE a6##TYPE[SZ]; \
> + for (int i = 0; i < SZ; i++) \
> + a6##TYPE[i] = i; \
> + ncopysigns_##TYPE (a6##TYPE, a6##TYPE, -VAL, pred, SZ); \
> + for (int i = 0; i < SZ; i++) \
> + assert (!pred[i] || __builtin_fabs (-a6##TYPE[i] + i) < EPS);
> +
> +#define RUN_ALL() \
> + RUN (_Float16, 5) \
> + RUN2 (_Float16, 11) \
> + RUN3 (_Float16, 16) \
> + RUN4 (_Float16, 17) \
> + RUN5 (_Float16, 123) \
> + RUN6 (_Float16, 777)
> +
> +int
> +main ()
> +{
> + INIT_PRED ()
> + RUN_ALL ()
> +}
> --
> 2.36.3
>
Committed, thanks Kito.
On 2023/9/13 16:49, Kito Cheng wrote:
> LGTM
>
> On Wed, Sep 13, 2023 at 12:25 AM Lehua Ding <lehua.ding@rivai.ai> wrote:
>>
>> This patch add combine patterns to combine vfsgnj.vv + vcond_mask
>> to mask vfsgnj.vv. For vfsgnjx.vv, it can not be produced in midend
>> currently. We will send another patch to take this issue.
>>
>> gcc/ChangeLog:
>>
>> * config/riscv/autovec-opt.md (*copysign<mode>_neg): Move.
>> (*cond_copysign<mode>): New combine pattern.
>> * config/riscv/riscv-v.cc (needs_fp_rounding): Extend.
>>
>> gcc/testsuite/ChangeLog:
>>
>> * gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c: New test.
>> * gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c: New test.
>> * gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c: New test.
>> * gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h: New test.
>> * gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c: New test.
>>
>> ---
>> gcc/config/riscv/autovec-opt.md | 68 +++++++++----
>> gcc/config/riscv/riscv-v.cc | 4 +-
>> .../rvv/autovec/cond/cond_copysign-run.c | 99 +++++++++++++++++++
>> .../rvv/autovec/cond/cond_copysign-rv32gcv.c | 12 +++
>> .../rvv/autovec/cond/cond_copysign-rv64gcv.c | 12 +++
>> .../rvv/autovec/cond/cond_copysign-template.h | 81 +++++++++++++++
>> .../rvv/autovec/cond/cond_copysign-zvfh-run.c | 93 +++++++++++++++++
>> 7 files changed, 349 insertions(+), 20 deletions(-)
>> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c
>> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c
>> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c
>> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h
>> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c
>>
>> diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
>> index 58e80044f1e..f759525f96b 100644
>> --- a/gcc/config/riscv/autovec-opt.md
>> +++ b/gcc/config/riscv/autovec-opt.md
>> @@ -609,6 +609,10 @@
>> (set_attr "mode" "<V_DOUBLE_TRUNC>")
>> (set (attr "frm_mode") (symbol_ref "riscv_vector::FRM_DYN"))])
>>
>> +;; =============================================================================
>> +;; Combine op + vmerge to cond_op
>> +;; =============================================================================
>> +
>> ;; Combine <op> and vcond_mask generated by midend into cond_len_<op>
>> ;; Currently supported operations:
>> ;; abs(FP)
>> @@ -651,25 +655,6 @@
>> DONE;
>> })
>>
>> -;; Combine vlmax neg and UNSPEC_VCOPYSIGN
>> -(define_insn_and_split "*copysign<mode>_neg"
>> - [(set (match_operand:VF 0 "register_operand")
>> - (neg:VF
>> - (unspec:VF [
>> - (match_operand:VF 1 "register_operand")
>> - (match_operand:VF 2 "register_operand")
>> - ] UNSPEC_VCOPYSIGN)))]
>> - "TARGET_VECTOR && can_create_pseudo_p ()"
>> - "#"
>> - "&& 1"
>> - [(const_int 0)]
>> -{
>> - riscv_vector::emit_vlmax_insn (code_for_pred_ncopysign (<MODE>mode),
>> - riscv_vector::BINARY_OP, operands);
>> - DONE;
>> -}
>> -[(set_attr "type" "vector")])
>> -
>> ;; Combine sign_extend/zero_extend(vf2) and vcond_mask
>> (define_insn_and_split "*cond_<optab><v_double_trunc><mode>"
>> [(set (match_operand:VWEXTI 0 "register_operand")
>> @@ -918,6 +903,27 @@
>> }
>> [(set_attr "type" "vector")])
>>
>> +;; Combine vfsgnj.vv + vcond_mask
>> +(define_insn_and_split "*cond_copysign<mode>"
>> + [(set (match_operand:VF 0 "register_operand")
>> + (if_then_else:VF
>> + (match_operand:<VM> 1 "register_operand")
>> + (unspec:VF
>> + [(match_operand:VF 2 "register_operand")
>> + (match_operand:VF 3 "register_operand")] UNSPEC_VCOPYSIGN)
>> + (match_operand:VF 4 "register_operand")))]
>> + "TARGET_VECTOR && can_create_pseudo_p ()"
>> + "#"
>> + "&& 1"
>> + [(const_int 0)]
>> +{
>> + insn_code icode = code_for_pred (UNSPEC_VCOPYSIGN, <MODE>mode);
>> + rtx ops[] = {operands[0], operands[1], operands[2], operands[3], operands[4],
>> + gen_int_mode (GET_MODE_NUNITS (<MODE>mode), Pmode)};
>> + riscv_vector::expand_cond_len_binop (icode, ops);
>> + DONE;
>> +})
>> +
>> ;; =============================================================================
>> ;; Combine extend + binop to widen_binop
>> ;; =============================================================================
>> @@ -1119,3 +1125,27 @@
>> DONE;
>> }
>> [(set_attr "type" "vfwmul")])
>> +
>> +
>> +;; =============================================================================
>> +;; Misc combine patterns
>> +;; =============================================================================
>> +
>> +;; Combine vlmax neg and UNSPEC_VCOPYSIGN
>> +(define_insn_and_split "*copysign<mode>_neg"
>> + [(set (match_operand:VF 0 "register_operand")
>> + (neg:VF
>> + (unspec:VF [
>> + (match_operand:VF 1 "register_operand")
>> + (match_operand:VF 2 "register_operand")
>> + ] UNSPEC_VCOPYSIGN)))]
>> + "TARGET_VECTOR && can_create_pseudo_p ()"
>> + "#"
>> + "&& 1"
>> + [(const_int 0)]
>> +{
>> + riscv_vector::emit_vlmax_insn (code_for_pred_ncopysign (<MODE>mode),
>> + riscv_vector::BINARY_OP, operands);
>> + DONE;
>> +}
>> +[(set_attr "type" "vector")])
>> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
>> index 4d95bd773a2..76e6094f45b 100644
>> --- a/gcc/config/riscv/riscv-v.cc
>> +++ b/gcc/config/riscv/riscv-v.cc
>> @@ -2970,7 +2970,9 @@ needs_fp_rounding (unsigned icode, machine_mode mode)
>> && icode != maybe_code_for_pred_extend (mode)
>> /* narrower-INT -> FP */
>> && icode != maybe_code_for_pred_widen (FLOAT, mode)
>> - && icode != maybe_code_for_pred_widen (UNSIGNED_FLOAT, mode);
>> + && icode != maybe_code_for_pred_widen (UNSIGNED_FLOAT, mode)
>> + /* vfsgnj */
>> + && icode != maybe_code_for_pred (UNSPEC_VCOPYSIGN, mode);
>> }
>>
>> /* Subroutine to expand COND_LEN_* patterns. */
>> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c
>> new file mode 100644
>> index 00000000000..be37854c135
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c
>> @@ -0,0 +1,99 @@
>> +/* { dg-do run { target { riscv_vector } } } */
>> +/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
>> +
>> +#include "cond_copysign-template.h"
>> +
>> +#include <assert.h>
>> +
>> +#define SZ 512
>> +
>> +#define EPS 1e-6
>> +
>> +#define INIT_PRED() \
>> + int pred[SZ]; \
>> + for (int i = 0; i < SZ; i++) \
>> + { \
>> + pred[i] = i % 3; \
>> + }
>> +
>> +#define RUN(TYPE, VAL) \
>> + TYPE a##TYPE[SZ]; \
>> + TYPE b##TYPE[SZ]; \
>> + for (int i = 0; i < SZ; i++) \
>> + { \
>> + a##TYPE[i] = i; \
>> + b##TYPE[i] = (i & 1) ? VAL : -VAL; \
>> + } \
>> + copysign_##TYPE (a##TYPE, a##TYPE, b##TYPE, pred, SZ); \
>> + for (int i = 0; i < SZ; i++) \
>> + assert (!pred[i] || __builtin_fabs (a##TYPE[i] - ((i & 1) ? i : -i)) < EPS);
>> +
>> +#define RUN2(TYPE, VAL) \
>> + TYPE a2##TYPE[SZ]; \
>> + for (int i = 0; i < SZ; i++) \
>> + a2##TYPE[i] = i; \
>> + copysigns_##TYPE (a2##TYPE, a2##TYPE, -VAL, pred, SZ); \
>> + for (int i = 0; i < SZ; i++) \
>> + assert (!pred[i] || __builtin_fabs (a2##TYPE[i] + i) < EPS);
>> +
>> +#define RUN3(TYPE, VAL) \
>> + TYPE a3##TYPE[SZ]; \
>> + TYPE b3##TYPE[SZ]; \
>> + for (int i = 0; i < SZ; i++) \
>> + { \
>> + a3##TYPE[i] = (i & 1) ? -i : i; \
>> + b3##TYPE[i] = (i & 1) ? VAL : -VAL; \
>> + } \
>> + xorsign_##TYPE (a3##TYPE, a3##TYPE, b3##TYPE, pred, SZ); \
>> + for (int i = 0; i < SZ; i++) \
>> + assert (!pred[i] || __builtin_fabs (a3##TYPE[i] + i) < EPS);
>> +
>> +#define RUN4(TYPE, VAL) \
>> + TYPE a4##TYPE[SZ]; \
>> + for (int i = 0; i < SZ; i++) \
>> + a4##TYPE[i] = -i; \
>> + xorsigns_##TYPE (a4##TYPE, a4##TYPE, -VAL, pred, SZ); \
>> + for (int i = 0; i < SZ; i++) \
>> + assert (!pred[i] || __builtin_fabs (a4##TYPE[i] - i) < EPS);
>> +
>> +#define RUN5(TYPE, VAL) \
>> + TYPE a5##TYPE[SZ]; \
>> + TYPE b5##TYPE[SZ]; \
>> + for (int i = 0; i < SZ; i++) \
>> + { \
>> + a5##TYPE[i] = i; \
>> + b5##TYPE[i] = (i & 1) ? VAL : -VAL; \
>> + } \
>> + ncopysign_##TYPE (a5##TYPE, a5##TYPE, b##TYPE, pred, SZ); \
>> + for (int i = 0; i < SZ; i++) \
>> + assert (!pred[i] \
>> + || __builtin_fabs (-a5##TYPE[i] - ((i & 1) ? i : -i)) < EPS);
>> +
>> +#define RUN6(TYPE, VAL) \
>> + TYPE a6##TYPE[SZ]; \
>> + for (int i = 0; i < SZ; i++) \
>> + a6##TYPE[i] = i; \
>> + ncopysigns_##TYPE (a6##TYPE, a6##TYPE, -VAL, pred, SZ); \
>> + for (int i = 0; i < SZ; i++) \
>> + assert (!pred[i] || __builtin_fabs (-a6##TYPE[i] + i) < EPS);
>> +
>> +#define RUN_ALL() \
>> + RUN (float, 5) \
>> + RUN (double, 6) \
>> + RUN2 (float, 11) \
>> + RUN2 (double, 12) \
>> + RUN3 (float, 16) \
>> + RUN3 (double, 18) \
>> + RUN4 (float, 17) \
>> + RUN4 (double, 19) \
>> + RUN5 (float, 123) \
>> + RUN5 (double, 523) \
>> + RUN6 (float, 777) \
>> + RUN6 (double, 877)
>> +
>> +int
>> +main ()
>> +{
>> + INIT_PRED ()
>> + RUN_ALL ()
>> +}
>> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c
>> new file mode 100644
>> index 00000000000..cef531b9700
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c
>> @@ -0,0 +1,12 @@
>> +/* { dg-do compile } */
>> +/* { dg-additional-options "-std=c99 -O3 -fno-vect-cost-model -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
>> +
>> +#include "cond_copysign-template.h"
>> +
>> +/* { dg-final { scan-assembler-times {\tvfsgnj\.vv} 6 } } */
>> +/* 1. The vectorizer wraps scalar variants of copysign into vector constants which
>> + expand cannot handle currently.
>> + 2. match.pd convert .COPYSIGN (1, b) + COND_MUL to AND + XOR currently. */
>> +/* { dg-final { scan-assembler-times {\tvfsgnjx\.vv} 6 { xfail riscv*-*-* } } } */
>> +/* { dg-final { scan-assembler-times {\tvfsgnjn\.vv} 6 } } */
>> +/* { dg-final { scan-assembler-not {\tvmerge\.vvm} } } */
>> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c
>> new file mode 100644
>> index 00000000000..cc2aa4de757
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c
>> @@ -0,0 +1,12 @@
>> +/* { dg-do compile } */
>> +/* { dg-additional-options "-std=c99 -O3 -fno-vect-cost-model -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
>> +
>> +#include "cond_copysign-template.h"
>> +
>> +/* { dg-final { scan-assembler-times {\tvfsgnj\.vv} 6 } } */
>> +/* 1. The vectorizer wraps scalar variants of copysign into vector constants which
>> + expand cannot handle currently.
>> + 2. match.pd convert .COPYSIGN (1, b) + COND_MUL to AND + XOR currently. */
>> +/* { dg-final { scan-assembler-times {\tvfsgnjx\.vv} 6 { xfail riscv*-*-* } } } */
>> +/* { dg-final { scan-assembler-times {\tvfsgnjn\.vv} 6 } } */
>> +/* { dg-final { scan-assembler-not {\tvmerge\.vvm} } } */
>> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h
>> new file mode 100644
>> index 00000000000..4191500fd83
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h
>> @@ -0,0 +1,81 @@
>> +#include <stdint-gcc.h>
>> +
>> +#define TEST_TYPE(TYPE, SUFFIX) \
>> + __attribute__ ((noipa)) void copysign_##TYPE (TYPE *restrict dst, \
>> + TYPE *restrict a, \
>> + TYPE *restrict b, \
>> + int *restrict pred, int n) \
>> + { \
>> + for (int i = 0; i < n; i++) \
>> + dst[i] = pred[i] ? __builtin_copysign##SUFFIX (a[i], b[i]) : dst[i]; \
>> + }
>> +
>> +#define TEST_TYPE2(TYPE, SUFFIX) \
>> + __attribute__ ((noipa)) void copysigns_##TYPE (TYPE *restrict dst, \
>> + TYPE *restrict a, TYPE b, \
>> + int *restrict pred, int n) \
>> + { \
>> + for (int i = 0; i < n; i++) \
>> + dst[i] = pred[i] ? __builtin_copysign##SUFFIX (a[i], b) : dst[i]; \
>> + }
>> +
>> +#define TEST_TYPE3(TYPE, SUFFIX) \
>> + __attribute__ ((noipa)) void xorsign_##TYPE (TYPE *restrict dst, \
>> + TYPE *restrict a, \
>> + TYPE *restrict b, \
>> + int *restrict pred, int n) \
>> + { \
>> + for (int i = 0; i < n; i++) \
>> + dst[i] \
>> + = pred[i] ? a[i] * __builtin_copysign##SUFFIX (1.0, b[i]) : dst[i]; \
>> + }
>> +
>> +#define TEST_TYPE4(TYPE, SUFFIX) \
>> + __attribute__ ((noipa)) void xorsigns_##TYPE (TYPE *restrict dst, \
>> + TYPE *restrict a, TYPE b, \
>> + int *restrict pred, int n) \
>> + { \
>> + for (int i = 0; i < n; i++) \
>> + dst[i] = pred[i] ? a[i] * __builtin_copysign##SUFFIX (1.0, b) : dst[i]; \
>> + }
>> +
>> +#define TEST_TYPE5(TYPE, SUFFIX) \
>> + __attribute__ ((noipa)) void ncopysign_##TYPE (TYPE *restrict dst, \
>> + TYPE *restrict a, \
>> + TYPE *restrict b, \
>> + int *restrict pred, int n) \
>> + { \
>> + for (int i = 0; i < n; i++) \
>> + dst[i] = pred[i] ? -__builtin_copysign##SUFFIX (a[i], b[i]) : dst[i]; \
>> + }
>> +
>> +#define TEST_TYPE6(TYPE, SUFFIX) \
>> + __attribute__ ((noipa)) void ncopysigns_##TYPE (TYPE *restrict dst, \
>> + TYPE *restrict a, TYPE b, \
>> + int *restrict pred, int n) \
>> + { \
>> + for (int i = 0; i < n; i++) \
>> + dst[i] = pred[i] ? -__builtin_copysign##SUFFIX (a[i], b) : dst[i]; \
>> + }
>> +
>> +#define TEST_ALL() \
>> + TEST_TYPE (_Float16, f16) \
>> + TEST_TYPE (float, f) \
>> + TEST_TYPE (double, ) \
>> + TEST_TYPE2 (_Float16, f16) \
>> + TEST_TYPE2 (float, f) \
>> + TEST_TYPE2 (double, ) \
>> + TEST_TYPE3 (_Float16, f16) \
>> + TEST_TYPE3 (float, f) \
>> + TEST_TYPE3 (double, ) \
>> + TEST_TYPE4 (_Float16, f16) \
>> + TEST_TYPE4 (float, f) \
>> + TEST_TYPE4 (double, ) \
>> + TEST_TYPE5 (_Float16, f16) \
>> + TEST_TYPE5 (float, f) \
>> + TEST_TYPE5 (double, ) \
>> + TEST_TYPE6 (_Float16, f16) \
>> + TEST_TYPE6 (float, f) \
>> + TEST_TYPE6 (double, )
>> +
>> +TEST_ALL ()
>> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c
>> new file mode 100644
>> index 00000000000..6e337f9e74c
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c
>> @@ -0,0 +1,93 @@
>> +/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
>> +/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
>> +
>> +#include "cond_copysign-template.h"
>> +
>> +#include <assert.h>
>> +
>> +#define SZ 512
>> +
>> +#define EPS 1e-6
>> +
>> +#define INIT_PRED() \
>> + int pred[SZ]; \
>> + for (int i = 0; i < SZ; i++) \
>> + { \
>> + pred[i] = i % 3; \
>> + }
>> +
>> +#define RUN(TYPE, VAL) \
>> + TYPE a##TYPE[SZ]; \
>> + TYPE b##TYPE[SZ]; \
>> + for (int i = 0; i < SZ; i++) \
>> + { \
>> + a##TYPE[i] = i; \
>> + b##TYPE[i] = (i & 1) ? VAL : -VAL; \
>> + } \
>> + copysign_##TYPE (a##TYPE, a##TYPE, b##TYPE, pred, SZ); \
>> + for (int i = 0; i < SZ; i++) \
>> + assert (!pred[i] || __builtin_fabs (a##TYPE[i] - ((i & 1) ? i : -i)) < EPS);
>> +
>> +#define RUN2(TYPE, VAL) \
>> + TYPE a2##TYPE[SZ]; \
>> + for (int i = 0; i < SZ; i++) \
>> + a2##TYPE[i] = i; \
>> + copysigns_##TYPE (a2##TYPE, a2##TYPE, -VAL, pred, SZ); \
>> + for (int i = 0; i < SZ; i++) \
>> + assert (!pred[i] || __builtin_fabs (a2##TYPE[i] + i) < EPS);
>> +
>> +#define RUN3(TYPE, VAL) \
>> + TYPE a3##TYPE[SZ]; \
>> + TYPE b3##TYPE[SZ]; \
>> + for (int i = 0; i < SZ; i++) \
>> + { \
>> + a3##TYPE[i] = (i & 1) ? -i : i; \
>> + b3##TYPE[i] = (i & 1) ? VAL : -VAL; \
>> + } \
>> + xorsign_##TYPE (a3##TYPE, a3##TYPE, b3##TYPE, pred, SZ); \
>> + for (int i = 0; i < SZ; i++) \
>> + assert (!pred[i] || __builtin_fabs (a3##TYPE[i] + i) < EPS);
>> +
>> +#define RUN4(TYPE, VAL) \
>> + TYPE a4##TYPE[SZ]; \
>> + for (int i = 0; i < SZ; i++) \
>> + a4##TYPE[i] = -i; \
>> + xorsigns_##TYPE (a4##TYPE, a4##TYPE, -VAL, pred, SZ); \
>> + for (int i = 0; i < SZ; i++) \
>> + assert (!pred[i] || __builtin_fabs (a4##TYPE[i] - i) < EPS);
>> +
>> +#define RUN5(TYPE, VAL) \
>> + TYPE a5##TYPE[SZ]; \
>> + TYPE b5##TYPE[SZ]; \
>> + for (int i = 0; i < SZ; i++) \
>> + { \
>> + a5##TYPE[i] = i; \
>> + b5##TYPE[i] = (i & 1) ? VAL : -VAL; \
>> + } \
>> + ncopysign_##TYPE (a5##TYPE, a5##TYPE, b##TYPE, pred, SZ); \
>> + for (int i = 0; i < SZ; i++) \
>> + assert (!pred[i] \
>> + || __builtin_fabs (-a5##TYPE[i] - ((i & 1) ? i : -i)) < EPS);
>> +
>> +#define RUN6(TYPE, VAL) \
>> + TYPE a6##TYPE[SZ]; \
>> + for (int i = 0; i < SZ; i++) \
>> + a6##TYPE[i] = i; \
>> + ncopysigns_##TYPE (a6##TYPE, a6##TYPE, -VAL, pred, SZ); \
>> + for (int i = 0; i < SZ; i++) \
>> + assert (!pred[i] || __builtin_fabs (-a6##TYPE[i] + i) < EPS);
>> +
>> +#define RUN_ALL() \
>> + RUN (_Float16, 5) \
>> + RUN2 (_Float16, 11) \
>> + RUN3 (_Float16, 16) \
>> + RUN4 (_Float16, 17) \
>> + RUN5 (_Float16, 123) \
>> + RUN6 (_Float16, 777)
>> +
>> +int
>> +main ()
>> +{
>> + INIT_PRED ()
>> + RUN_ALL ()
>> +}
>> --
>> 2.36.3
>>
>
@@ -609,6 +609,10 @@
(set_attr "mode" "<V_DOUBLE_TRUNC>")
(set (attr "frm_mode") (symbol_ref "riscv_vector::FRM_DYN"))])
+;; =============================================================================
+;; Combine op + vmerge to cond_op
+;; =============================================================================
+
;; Combine <op> and vcond_mask generated by midend into cond_len_<op>
;; Currently supported operations:
;; abs(FP)
@@ -651,25 +655,6 @@
DONE;
})
-;; Combine vlmax neg and UNSPEC_VCOPYSIGN
-(define_insn_and_split "*copysign<mode>_neg"
- [(set (match_operand:VF 0 "register_operand")
- (neg:VF
- (unspec:VF [
- (match_operand:VF 1 "register_operand")
- (match_operand:VF 2 "register_operand")
- ] UNSPEC_VCOPYSIGN)))]
- "TARGET_VECTOR && can_create_pseudo_p ()"
- "#"
- "&& 1"
- [(const_int 0)]
-{
- riscv_vector::emit_vlmax_insn (code_for_pred_ncopysign (<MODE>mode),
- riscv_vector::BINARY_OP, operands);
- DONE;
-}
-[(set_attr "type" "vector")])
-
;; Combine sign_extend/zero_extend(vf2) and vcond_mask
(define_insn_and_split "*cond_<optab><v_double_trunc><mode>"
[(set (match_operand:VWEXTI 0 "register_operand")
@@ -918,6 +903,27 @@
}
[(set_attr "type" "vector")])
+;; Combine vfsgnj.vv + vcond_mask
+(define_insn_and_split "*cond_copysign<mode>"
+ [(set (match_operand:VF 0 "register_operand")
+ (if_then_else:VF
+ (match_operand:<VM> 1 "register_operand")
+ (unspec:VF
+ [(match_operand:VF 2 "register_operand")
+ (match_operand:VF 3 "register_operand")] UNSPEC_VCOPYSIGN)
+ (match_operand:VF 4 "register_operand")))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ insn_code icode = code_for_pred (UNSPEC_VCOPYSIGN, <MODE>mode);
+ rtx ops[] = {operands[0], operands[1], operands[2], operands[3], operands[4],
+ gen_int_mode (GET_MODE_NUNITS (<MODE>mode), Pmode)};
+ riscv_vector::expand_cond_len_binop (icode, ops);
+ DONE;
+})
+
;; =============================================================================
;; Combine extend + binop to widen_binop
;; =============================================================================
@@ -1119,3 +1125,27 @@
DONE;
}
[(set_attr "type" "vfwmul")])
+
+
+;; =============================================================================
+;; Misc combine patterns
+;; =============================================================================
+
+;; Combine vlmax neg and UNSPEC_VCOPYSIGN
+(define_insn_and_split "*copysign<mode>_neg"
+ [(set (match_operand:VF 0 "register_operand")
+ (neg:VF
+ (unspec:VF [
+ (match_operand:VF 1 "register_operand")
+ (match_operand:VF 2 "register_operand")
+ ] UNSPEC_VCOPYSIGN)))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ riscv_vector::emit_vlmax_insn (code_for_pred_ncopysign (<MODE>mode),
+ riscv_vector::BINARY_OP, operands);
+ DONE;
+}
+[(set_attr "type" "vector")])
@@ -2970,7 +2970,9 @@ needs_fp_rounding (unsigned icode, machine_mode mode)
&& icode != maybe_code_for_pred_extend (mode)
/* narrower-INT -> FP */
&& icode != maybe_code_for_pred_widen (FLOAT, mode)
- && icode != maybe_code_for_pred_widen (UNSIGNED_FLOAT, mode);
+ && icode != maybe_code_for_pred_widen (UNSIGNED_FLOAT, mode)
+ /* vfsgnj */
+ && icode != maybe_code_for_pred (UNSPEC_VCOPYSIGN, mode);
}
/* Subroutine to expand COND_LEN_* patterns. */
new file mode 100644
@@ -0,0 +1,99 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
+
+#include "cond_copysign-template.h"
+
+#include <assert.h>
+
+#define SZ 512
+
+#define EPS 1e-6
+
+#define INIT_PRED() \
+ int pred[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ { \
+ pred[i] = i % 3; \
+ }
+
+#define RUN(TYPE, VAL) \
+ TYPE a##TYPE[SZ]; \
+ TYPE b##TYPE[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ { \
+ a##TYPE[i] = i; \
+ b##TYPE[i] = (i & 1) ? VAL : -VAL; \
+ } \
+ copysign_##TYPE (a##TYPE, a##TYPE, b##TYPE, pred, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ assert (!pred[i] || __builtin_fabs (a##TYPE[i] - ((i & 1) ? i : -i)) < EPS);
+
+#define RUN2(TYPE, VAL) \
+ TYPE a2##TYPE[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ a2##TYPE[i] = i; \
+ copysigns_##TYPE (a2##TYPE, a2##TYPE, -VAL, pred, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ assert (!pred[i] || __builtin_fabs (a2##TYPE[i] + i) < EPS);
+
+#define RUN3(TYPE, VAL) \
+ TYPE a3##TYPE[SZ]; \
+ TYPE b3##TYPE[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ { \
+ a3##TYPE[i] = (i & 1) ? -i : i; \
+ b3##TYPE[i] = (i & 1) ? VAL : -VAL; \
+ } \
+ xorsign_##TYPE (a3##TYPE, a3##TYPE, b3##TYPE, pred, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ assert (!pred[i] || __builtin_fabs (a3##TYPE[i] + i) < EPS);
+
+#define RUN4(TYPE, VAL) \
+ TYPE a4##TYPE[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ a4##TYPE[i] = -i; \
+ xorsigns_##TYPE (a4##TYPE, a4##TYPE, -VAL, pred, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ assert (!pred[i] || __builtin_fabs (a4##TYPE[i] - i) < EPS);
+
+#define RUN5(TYPE, VAL) \
+ TYPE a5##TYPE[SZ]; \
+ TYPE b5##TYPE[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ { \
+ a5##TYPE[i] = i; \
+ b5##TYPE[i] = (i & 1) ? VAL : -VAL; \
+ } \
+ ncopysign_##TYPE (a5##TYPE, a5##TYPE, b##TYPE, pred, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ assert (!pred[i] \
+ || __builtin_fabs (-a5##TYPE[i] - ((i & 1) ? i : -i)) < EPS);
+
+#define RUN6(TYPE, VAL) \
+ TYPE a6##TYPE[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ a6##TYPE[i] = i; \
+ ncopysigns_##TYPE (a6##TYPE, a6##TYPE, -VAL, pred, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ assert (!pred[i] || __builtin_fabs (-a6##TYPE[i] + i) < EPS);
+
+#define RUN_ALL() \
+ RUN (float, 5) \
+ RUN (double, 6) \
+ RUN2 (float, 11) \
+ RUN2 (double, 12) \
+ RUN3 (float, 16) \
+ RUN3 (double, 18) \
+ RUN4 (float, 17) \
+ RUN4 (double, 19) \
+ RUN5 (float, 123) \
+ RUN5 (double, 523) \
+ RUN6 (float, 777) \
+ RUN6 (double, 877)
+
+int
+main ()
+{
+ INIT_PRED ()
+ RUN_ALL ()
+}
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -O3 -fno-vect-cost-model -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
+
+#include "cond_copysign-template.h"
+
+/* { dg-final { scan-assembler-times {\tvfsgnj\.vv} 6 } } */
+/* 1. The vectorizer wraps scalar variants of copysign into vector constants which
+ expand cannot handle currently.
+ 2. match.pd convert .COPYSIGN (1, b) + COND_MUL to AND + XOR currently. */
+/* { dg-final { scan-assembler-times {\tvfsgnjx\.vv} 6 { xfail riscv*-*-* } } } */
+/* { dg-final { scan-assembler-times {\tvfsgnjn\.vv} 6 } } */
+/* { dg-final { scan-assembler-not {\tvmerge\.vvm} } } */
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -O3 -fno-vect-cost-model -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
+
+#include "cond_copysign-template.h"
+
+/* { dg-final { scan-assembler-times {\tvfsgnj\.vv} 6 } } */
+/* 1. The vectorizer wraps scalar variants of copysign into vector constants which
+ expand cannot handle currently.
+ 2. match.pd convert .COPYSIGN (1, b) + COND_MUL to AND + XOR currently. */
+/* { dg-final { scan-assembler-times {\tvfsgnjx\.vv} 6 { xfail riscv*-*-* } } } */
+/* { dg-final { scan-assembler-times {\tvfsgnjn\.vv} 6 } } */
+/* { dg-final { scan-assembler-not {\tvmerge\.vvm} } } */
new file mode 100644
@@ -0,0 +1,81 @@
+#include <stdint-gcc.h>
+
+#define TEST_TYPE(TYPE, SUFFIX) \
+ __attribute__ ((noipa)) void copysign_##TYPE (TYPE *restrict dst, \
+ TYPE *restrict a, \
+ TYPE *restrict b, \
+ int *restrict pred, int n) \
+ { \
+ for (int i = 0; i < n; i++) \
+ dst[i] = pred[i] ? __builtin_copysign##SUFFIX (a[i], b[i]) : dst[i]; \
+ }
+
+#define TEST_TYPE2(TYPE, SUFFIX) \
+ __attribute__ ((noipa)) void copysigns_##TYPE (TYPE *restrict dst, \
+ TYPE *restrict a, TYPE b, \
+ int *restrict pred, int n) \
+ { \
+ for (int i = 0; i < n; i++) \
+ dst[i] = pred[i] ? __builtin_copysign##SUFFIX (a[i], b) : dst[i]; \
+ }
+
+#define TEST_TYPE3(TYPE, SUFFIX) \
+ __attribute__ ((noipa)) void xorsign_##TYPE (TYPE *restrict dst, \
+ TYPE *restrict a, \
+ TYPE *restrict b, \
+ int *restrict pred, int n) \
+ { \
+ for (int i = 0; i < n; i++) \
+ dst[i] \
+ = pred[i] ? a[i] * __builtin_copysign##SUFFIX (1.0, b[i]) : dst[i]; \
+ }
+
+#define TEST_TYPE4(TYPE, SUFFIX) \
+ __attribute__ ((noipa)) void xorsigns_##TYPE (TYPE *restrict dst, \
+ TYPE *restrict a, TYPE b, \
+ int *restrict pred, int n) \
+ { \
+ for (int i = 0; i < n; i++) \
+ dst[i] = pred[i] ? a[i] * __builtin_copysign##SUFFIX (1.0, b) : dst[i]; \
+ }
+
+#define TEST_TYPE5(TYPE, SUFFIX) \
+ __attribute__ ((noipa)) void ncopysign_##TYPE (TYPE *restrict dst, \
+ TYPE *restrict a, \
+ TYPE *restrict b, \
+ int *restrict pred, int n) \
+ { \
+ for (int i = 0; i < n; i++) \
+ dst[i] = pred[i] ? -__builtin_copysign##SUFFIX (a[i], b[i]) : dst[i]; \
+ }
+
+#define TEST_TYPE6(TYPE, SUFFIX) \
+ __attribute__ ((noipa)) void ncopysigns_##TYPE (TYPE *restrict dst, \
+ TYPE *restrict a, TYPE b, \
+ int *restrict pred, int n) \
+ { \
+ for (int i = 0; i < n; i++) \
+ dst[i] = pred[i] ? -__builtin_copysign##SUFFIX (a[i], b) : dst[i]; \
+ }
+
+#define TEST_ALL() \
+ TEST_TYPE (_Float16, f16) \
+ TEST_TYPE (float, f) \
+ TEST_TYPE (double, ) \
+ TEST_TYPE2 (_Float16, f16) \
+ TEST_TYPE2 (float, f) \
+ TEST_TYPE2 (double, ) \
+ TEST_TYPE3 (_Float16, f16) \
+ TEST_TYPE3 (float, f) \
+ TEST_TYPE3 (double, ) \
+ TEST_TYPE4 (_Float16, f16) \
+ TEST_TYPE4 (float, f) \
+ TEST_TYPE4 (double, ) \
+ TEST_TYPE5 (_Float16, f16) \
+ TEST_TYPE5 (float, f) \
+ TEST_TYPE5 (double, ) \
+ TEST_TYPE6 (_Float16, f16) \
+ TEST_TYPE6 (float, f) \
+ TEST_TYPE6 (double, )
+
+TEST_ALL ()
new file mode 100644
@@ -0,0 +1,93 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
+
+#include "cond_copysign-template.h"
+
+#include <assert.h>
+
+#define SZ 512
+
+#define EPS 1e-6
+
+#define INIT_PRED() \
+ int pred[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ { \
+ pred[i] = i % 3; \
+ }
+
+#define RUN(TYPE, VAL) \
+ TYPE a##TYPE[SZ]; \
+ TYPE b##TYPE[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ { \
+ a##TYPE[i] = i; \
+ b##TYPE[i] = (i & 1) ? VAL : -VAL; \
+ } \
+ copysign_##TYPE (a##TYPE, a##TYPE, b##TYPE, pred, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ assert (!pred[i] || __builtin_fabs (a##TYPE[i] - ((i & 1) ? i : -i)) < EPS);
+
+#define RUN2(TYPE, VAL) \
+ TYPE a2##TYPE[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ a2##TYPE[i] = i; \
+ copysigns_##TYPE (a2##TYPE, a2##TYPE, -VAL, pred, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ assert (!pred[i] || __builtin_fabs (a2##TYPE[i] + i) < EPS);
+
+#define RUN3(TYPE, VAL) \
+ TYPE a3##TYPE[SZ]; \
+ TYPE b3##TYPE[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ { \
+ a3##TYPE[i] = (i & 1) ? -i : i; \
+ b3##TYPE[i] = (i & 1) ? VAL : -VAL; \
+ } \
+ xorsign_##TYPE (a3##TYPE, a3##TYPE, b3##TYPE, pred, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ assert (!pred[i] || __builtin_fabs (a3##TYPE[i] + i) < EPS);
+
+#define RUN4(TYPE, VAL) \
+ TYPE a4##TYPE[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ a4##TYPE[i] = -i; \
+ xorsigns_##TYPE (a4##TYPE, a4##TYPE, -VAL, pred, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ assert (!pred[i] || __builtin_fabs (a4##TYPE[i] - i) < EPS);
+
+#define RUN5(TYPE, VAL) \
+ TYPE a5##TYPE[SZ]; \
+ TYPE b5##TYPE[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ { \
+ a5##TYPE[i] = i; \
+ b5##TYPE[i] = (i & 1) ? VAL : -VAL; \
+ } \
+ ncopysign_##TYPE (a5##TYPE, a5##TYPE, b##TYPE, pred, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ assert (!pred[i] \
+ || __builtin_fabs (-a5##TYPE[i] - ((i & 1) ? i : -i)) < EPS);
+
+#define RUN6(TYPE, VAL) \
+ TYPE a6##TYPE[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ a6##TYPE[i] = i; \
+ ncopysigns_##TYPE (a6##TYPE, a6##TYPE, -VAL, pred, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ assert (!pred[i] || __builtin_fabs (-a6##TYPE[i] + i) < EPS);
+
+#define RUN_ALL() \
+ RUN (_Float16, 5) \
+ RUN2 (_Float16, 11) \
+ RUN3 (_Float16, 16) \
+ RUN4 (_Float16, 17) \
+ RUN5 (_Float16, 123) \
+ RUN6 (_Float16, 777)
+
+int
+main ()
+{
+ INIT_PRED ()
+ RUN_ALL ()
+}