LoongArch: Provide fmin/fmax RTL pattern for vectors
Checks
Commit Message
We already had smin/smax RTL pattern using vfmin/vfmax instructions.
But for smin/smax, it's unspecified what will happen if either operand
contains any NaN operands. So we would not vectorize the loop with
-fno-finite-math-only (the default for all optimization levels expect
-Ofast).
But, LoongArch vfmin/vfmax instruction is IEEE-754-2008 conformant so we
can also use them and vectorize the loop.
gcc/ChangeLog:
* config/loongarch/simd.md (fmax<mode>3): New define_insn.
(fmin<mode>3): Likewise.
(reduc_fmax_scal_<mode>3): New define_expand.
(reduc_fmin_scal_<mode>3): Likewise.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/vfmax-vfmin.c: New test.
---
Happy new year folks. This is a follow-up of [1].
Bootstrapped and regtested on loongarch64-linux-gnu. Ok for trunk?
[1]:https://gcc.gnu.org/pipermail/gcc-patches/2023-December/641583.html
gcc/config/loongarch/simd.md | 31 +++++++++++++++++++
.../gcc.target/loongarch/vfmax-vfmin.c | 31 +++++++++++++++++++
2 files changed, 62 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/loongarch/vfmax-vfmin.c
Comments
LGTM!
Thanks!
在 2024/1/1 上午3:15, Xi Ruoyao 写道:
> We already had smin/smax RTL pattern using vfmin/vfmax instructions.
> But for smin/smax, it's unspecified what will happen if either operand
> contains any NaN operands. So we would not vectorize the loop with
> -fno-finite-math-only (the default for all optimization levels expect
> -Ofast).
>
> But, LoongArch vfmin/vfmax instruction is IEEE-754-2008 conformant so we
> can also use them and vectorize the loop.
>
> gcc/ChangeLog:
>
> * config/loongarch/simd.md (fmax<mode>3): New define_insn.
> (fmin<mode>3): Likewise.
> (reduc_fmax_scal_<mode>3): New define_expand.
> (reduc_fmin_scal_<mode>3): Likewise.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/loongarch/vfmax-vfmin.c: New test.
> ---
>
> Happy new year folks. This is a follow-up of [1].
>
> Bootstrapped and regtested on loongarch64-linux-gnu. Ok for trunk?
>
> [1]:https://gcc.gnu.org/pipermail/gcc-patches/2023-December/641583.html
>
> gcc/config/loongarch/simd.md | 31 +++++++++++++++++++
> .../gcc.target/loongarch/vfmax-vfmin.c | 31 +++++++++++++++++++
> 2 files changed, 62 insertions(+)
> create mode 100644 gcc/testsuite/gcc.target/loongarch/vfmax-vfmin.c
>
> diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
> index 93fb39abcf5..8ac1d75a85c 100644
> --- a/gcc/config/loongarch/simd.md
> +++ b/gcc/config/loongarch/simd.md
> @@ -426,6 +426,37 @@ (define_insn "<simd_isa>_<x>vfcmp_<fcond_unspec>_<simdfmt>"
> [(set_attr "type" "simd_fcmp")
> (set_attr "mode" "<MODE>")])
>
> +; [x]vf{min/max} instructions are IEEE-754-2008 conforming, use them for
> +; the corresponding IEEE-754-2008 operations. We must use UNSPEC instead
> +; of smin/smax though, see PR105414 and PR107013.
> +
> +(define_int_iterator UNSPEC_FMAXMIN [UNSPEC_FMAX UNSPEC_FMIN])
> +(define_int_attr fmaxmin [(UNSPEC_FMAX "fmax") (UNSPEC_FMIN "fmin")])
> +
> +(define_insn "<fmaxmin><mode>3"
> + [(set (match_operand:FVEC 0 "register_operand" "=f")
> + (unspec:FVEC [(match_operand:FVEC 1 "register_operand" "f")
> + (match_operand:FVEC 2 "register_operand" "f")]
> + UNSPEC_FMAXMIN))]
> + ""
> + "<x>v<fmaxmin>.<simdfmt>\t%<wu>0,%<wu>1,%<wu>2"
> + [(set_attr "type" "simd_fminmax")
> + (set_attr "mode" "<MODE>")])
> +
> +;; ... and also reduc operations.
> +(define_expand "reduc_<fmaxmin>_scal_<mode>"
> + [(match_operand:<UNITMODE> 0 "register_operand")
> + (match_operand:FVEC 1 "register_operand")
> + (const_int UNSPEC_FMAXMIN)]
> + ""
> +{
> + rtx tmp = gen_reg_rtx (<MODE>mode);
> + loongarch_expand_vector_reduc (gen_<fmaxmin><mode>3, tmp, operands[1]);
> + emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
> + const0_rtx));
> + DONE;
> +})
> +
> ; The LoongArch SX Instructions.
> (include "lsx.md")
>
> diff --git a/gcc/testsuite/gcc.target/loongarch/vfmax-vfmin.c b/gcc/testsuite/gcc.target/loongarch/vfmax-vfmin.c
> new file mode 100644
> index 00000000000..811fee361c3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/loongarch/vfmax-vfmin.c
> @@ -0,0 +1,31 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mtune=la464 -mlasx" } */
> +/* { dg-final { scan-assembler "\tvfmin\\.d" } } */
> +/* { dg-final { scan-assembler "\tvfmax\\.d" } } */
> +/* { dg-final { scan-assembler "\txvfmin\\.d" } } */
> +/* { dg-final { scan-assembler "\txvfmax\\.d" } } */
> +/* { dg-final { scan-assembler "\tvfmin\\.s" } } */
> +/* { dg-final { scan-assembler "\tvfmax\\.s" } } */
> +/* { dg-final { scan-assembler "\txvfmin\\.s" } } */
> +/* { dg-final { scan-assembler "\txvfmax\\.s" } } */
> +
> +#define T(OP) __typeof__ (__builtin_##OP (0, 0))
> +
> +#define TEST(OP, LEN) \
> +void \
> +test_##OP##LEN (T (OP) *restrict dest, \
> + const T (OP) *restrict src1, \
> + const T (OP) *restrict src2) \
> +{ \
> + for (int i = 0; i < LEN / sizeof (T(OP)); i++) \
> + dest[i] = __builtin_##OP (src1[i], src2[i]); \
> +}
> +
> +TEST(fmin, 16)
> +TEST(fmax, 16)
> +TEST(fmin, 32)
> +TEST(fmax, 32)
> +TEST(fminf, 16)
> +TEST(fmaxf, 16)
> +TEST(fminf, 32)
> +TEST(fmaxf, 32)
On Wed, 2024-01-03 at 16:24 +0800, chenglulu wrote:
> LGTM!
>
> Thanks!
Pushed r14-6890.
FWIW sometimes tree optimizer still fails to emit .reduc_f{max,min} or
it emits them sub-optimally. I've commented in PR112457 but maybe I
should've created a new ticket...
> 在 2024/1/1 上午3:15, Xi Ruoyao 写道:
> > We already had smin/smax RTL pattern using vfmin/vfmax instructions.
> > But for smin/smax, it's unspecified what will happen if either operand
> > contains any NaN operands. So we would not vectorize the loop with
> > -fno-finite-math-only (the default for all optimization levels expect
> > -Ofast).
> >
> > But, LoongArch vfmin/vfmax instruction is IEEE-754-2008 conformant so we
> > can also use them and vectorize the loop.
> >
> > gcc/ChangeLog:
> >
> > * config/loongarch/simd.md (fmax<mode>3): New define_insn.
> > (fmin<mode>3): Likewise.
> > (reduc_fmax_scal_<mode>3): New define_expand.
> > (reduc_fmin_scal_<mode>3): Likewise.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/loongarch/vfmax-vfmin.c: New test.
> > ---
> >
> > Happy new year folks. This is a follow-up of [1].
> >
> > Bootstrapped and regtested on loongarch64-linux-gnu. Ok for trunk?
> >
> > [1]:https://gcc.gnu.org/pipermail/gcc-patches/2023-December/641583.html
> >
> > gcc/config/loongarch/simd.md | 31 +++++++++++++++++++
> > .../gcc.target/loongarch/vfmax-vfmin.c | 31 +++++++++++++++++++
> > 2 files changed, 62 insertions(+)
> > create mode 100644 gcc/testsuite/gcc.target/loongarch/vfmax-vfmin.c
> >
> > diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
> > index 93fb39abcf5..8ac1d75a85c 100644
> > --- a/gcc/config/loongarch/simd.md
> > +++ b/gcc/config/loongarch/simd.md
> > @@ -426,6 +426,37 @@ (define_insn "<simd_isa>_<x>vfcmp_<fcond_unspec>_<simdfmt>"
> > [(set_attr "type" "simd_fcmp")
> > (set_attr "mode" "<MODE>")])
> >
> > +; [x]vf{min/max} instructions are IEEE-754-2008 conforming, use them for
> > +; the corresponding IEEE-754-2008 operations. We must use UNSPEC instead
> > +; of smin/smax though, see PR105414 and PR107013.
> > +
> > +(define_int_iterator UNSPEC_FMAXMIN [UNSPEC_FMAX UNSPEC_FMIN])
> > +(define_int_attr fmaxmin [(UNSPEC_FMAX "fmax") (UNSPEC_FMIN "fmin")])
> > +
> > +(define_insn "<fmaxmin><mode>3"
> > + [(set (match_operand:FVEC 0 "register_operand" "=f")
> > + (unspec:FVEC [(match_operand:FVEC 1 "register_operand" "f")
> > + (match_operand:FVEC 2 "register_operand" "f")]
> > + UNSPEC_FMAXMIN))]
> > + ""
> > + "<x>v<fmaxmin>.<simdfmt>\t%<wu>0,%<wu>1,%<wu>2"
> > + [(set_attr "type" "simd_fminmax")
> > + (set_attr "mode" "<MODE>")])
> > +
> > +;; ... and also reduc operations.
> > +(define_expand "reduc_<fmaxmin>_scal_<mode>"
> > + [(match_operand:<UNITMODE> 0 "register_operand")
> > + (match_operand:FVEC 1 "register_operand")
> > + (const_int UNSPEC_FMAXMIN)]
> > + ""
> > +{
> > + rtx tmp = gen_reg_rtx (<MODE>mode);
> > + loongarch_expand_vector_reduc (gen_<fmaxmin><mode>3, tmp, operands[1]);
> > + emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
> > + const0_rtx));
> > + DONE;
> > +})
> > +
> > ; The LoongArch SX Instructions.
> > (include "lsx.md")
> >
> > diff --git a/gcc/testsuite/gcc.target/loongarch/vfmax-vfmin.c b/gcc/testsuite/gcc.target/loongarch/vfmax-vfmin.c
> > new file mode 100644
> > index 00000000000..811fee361c3
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/loongarch/vfmax-vfmin.c
> > @@ -0,0 +1,31 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -mtune=la464 -mlasx" } */
> > +/* { dg-final { scan-assembler "\tvfmin\\.d" } } */
> > +/* { dg-final { scan-assembler "\tvfmax\\.d" } } */
> > +/* { dg-final { scan-assembler "\txvfmin\\.d" } } */
> > +/* { dg-final { scan-assembler "\txvfmax\\.d" } } */
> > +/* { dg-final { scan-assembler "\tvfmin\\.s" } } */
> > +/* { dg-final { scan-assembler "\tvfmax\\.s" } } */
> > +/* { dg-final { scan-assembler "\txvfmin\\.s" } } */
> > +/* { dg-final { scan-assembler "\txvfmax\\.s" } } */
> > +
> > +#define T(OP) __typeof__ (__builtin_##OP (0, 0))
> > +
> > +#define TEST(OP, LEN) \
> > +void \
> > +test_##OP##LEN (T (OP) *restrict dest, \
> > + const T (OP) *restrict src1, \
> > + const T (OP) *restrict src2) \
> > +{ \
> > + for (int i = 0; i < LEN / sizeof (T(OP)); i++) \
> > + dest[i] = __builtin_##OP (src1[i], src2[i]); \
> > +}
> > +
> > +TEST(fmin, 16)
> > +TEST(fmax, 16)
> > +TEST(fmin, 32)
> > +TEST(fmax, 32)
> > +TEST(fminf, 16)
> > +TEST(fmaxf, 16)
> > +TEST(fminf, 32)
> > +TEST(fmaxf, 32)
>
@@ -426,6 +426,37 @@ (define_insn "<simd_isa>_<x>vfcmp_<fcond_unspec>_<simdfmt>"
[(set_attr "type" "simd_fcmp")
(set_attr "mode" "<MODE>")])
+; [x]vf{min/max} instructions are IEEE-754-2008 conforming, use them for
+; the corresponding IEEE-754-2008 operations. We must use UNSPEC instead
+; of smin/smax though, see PR105414 and PR107013.
+
+(define_int_iterator UNSPEC_FMAXMIN [UNSPEC_FMAX UNSPEC_FMIN])
+(define_int_attr fmaxmin [(UNSPEC_FMAX "fmax") (UNSPEC_FMIN "fmin")])
+
+(define_insn "<fmaxmin><mode>3"
+ [(set (match_operand:FVEC 0 "register_operand" "=f")
+ (unspec:FVEC [(match_operand:FVEC 1 "register_operand" "f")
+ (match_operand:FVEC 2 "register_operand" "f")]
+ UNSPEC_FMAXMIN))]
+ ""
+ "<x>v<fmaxmin>.<simdfmt>\t%<wu>0,%<wu>1,%<wu>2"
+ [(set_attr "type" "simd_fminmax")
+ (set_attr "mode" "<MODE>")])
+
+;; ... and also reduc operations.
+(define_expand "reduc_<fmaxmin>_scal_<mode>"
+ [(match_operand:<UNITMODE> 0 "register_operand")
+ (match_operand:FVEC 1 "register_operand")
+ (const_int UNSPEC_FMAXMIN)]
+ ""
+{
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+ loongarch_expand_vector_reduc (gen_<fmaxmin><mode>3, tmp, operands[1]);
+ emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
+ const0_rtx));
+ DONE;
+})
+
; The LoongArch SX Instructions.
(include "lsx.md")
new file mode 100644
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=la464 -mlasx" } */
+/* { dg-final { scan-assembler "\tvfmin\\.d" } } */
+/* { dg-final { scan-assembler "\tvfmax\\.d" } } */
+/* { dg-final { scan-assembler "\txvfmin\\.d" } } */
+/* { dg-final { scan-assembler "\txvfmax\\.d" } } */
+/* { dg-final { scan-assembler "\tvfmin\\.s" } } */
+/* { dg-final { scan-assembler "\tvfmax\\.s" } } */
+/* { dg-final { scan-assembler "\txvfmin\\.s" } } */
+/* { dg-final { scan-assembler "\txvfmax\\.s" } } */
+
+#define T(OP) __typeof__ (__builtin_##OP (0, 0))
+
+#define TEST(OP, LEN) \
+void \
+test_##OP##LEN (T (OP) *restrict dest, \
+ const T (OP) *restrict src1, \
+ const T (OP) *restrict src2) \
+{ \
+ for (int i = 0; i < LEN / sizeof (T(OP)); i++) \
+ dest[i] = __builtin_##OP (src1[i], src2[i]); \
+}
+
+TEST(fmin, 16)
+TEST(fmax, 16)
+TEST(fmin, 32)
+TEST(fmax, 32)
+TEST(fminf, 16)
+TEST(fmaxf, 16)
+TEST(fminf, 32)
+TEST(fmaxf, 32)