[v2] RISC-V: Optimize vsetvl AVL for VLS VLMAX auto-vectorization
Checks
Commit Message
From: Pan Li <pan2.li@intel.com>
This patch is optimizing the AVL for VLS auto-vectorzation.
Given below sample code:
typedef int8_t vnx2qi __attribute__ ((vector_size (2)));
__attribute__ ((noipa)) void
f_vnx2qi (int8_t a, int8_t b, int8_t *out)
{
vnx2qi v = {a, b};
*(vnx2qi *) out = v;
}
Before this patch:
f_vnx2qi:
vsetvli a5,zero,e8,mf8,ta,ma
vmv.v.x v1,a0
vslide1down.vx v1,v1,a1
vse8.v v1,0(a2)
ret
After this patch:
f_vnx2qi:
vsetivli zero,2,e8,mf8,ta,ma
vmv.v.x v1,a0
vslide1down.vx v1,v1,a1
vse8.v v1,0(a2)
ret
Signed-off-by: Pan Li <pan2.li@intel.com>
Co-authored-by: Juzhe-Zhong <juzhe.zhong@rivai.ai>
Co-authored-by: kito-cheng <kito.cheng@sifive.com>
gcc/ChangeLog:
* config/riscv/riscv-v.cc (const_vlmax_p): New function for
deciding the mode is constant or not.
(set_len_and_policy): Optimize VLS-VLMAX code gen to vsetivli.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/base/vf_avl-1.c: New test.
---
gcc/config/riscv/riscv-v.cc | 26 ++++++++++++++++---
.../gcc.target/riscv/rvv/base/vf_avl-1.c | 15 +++++++++++
2 files changed, 38 insertions(+), 3 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-1.c
Comments
LGTM, thanks :)
On Mon, May 15, 2023 at 4:33 PM Pan Li via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> This patch is optimizing the AVL for VLS auto-vectorzation.
>
> Given below sample code:
>
> typedef int8_t vnx2qi __attribute__ ((vector_size (2)));
>
> __attribute__ ((noipa)) void
> f_vnx2qi (int8_t a, int8_t b, int8_t *out)
> {
> vnx2qi v = {a, b};
> *(vnx2qi *) out = v;
> }
>
> Before this patch:
> f_vnx2qi:
> vsetvli a5,zero,e8,mf8,ta,ma
> vmv.v.x v1,a0
> vslide1down.vx v1,v1,a1
> vse8.v v1,0(a2)
> ret
>
> After this patch:
> f_vnx2qi:
> vsetivli zero,2,e8,mf8,ta,ma
> vmv.v.x v1,a0
> vslide1down.vx v1,v1,a1
> vse8.v v1,0(a2)
> ret
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> Co-authored-by: Juzhe-Zhong <juzhe.zhong@rivai.ai>
> Co-authored-by: kito-cheng <kito.cheng@sifive.com>
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-v.cc (const_vlmax_p): New function for
> deciding the mode is constant or not.
> (set_len_and_policy): Optimize VLS-VLMAX code gen to vsetivli.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/base/vf_avl-1.c: New test.
> ---
> gcc/config/riscv/riscv-v.cc | 26 ++++++++++++++++---
> .../gcc.target/riscv/rvv/base/vf_avl-1.c | 15 +++++++++++
> 2 files changed, 38 insertions(+), 3 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-1.c
>
> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> index b8dc333f54e..d65e7300303 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -50,6 +50,19 @@ using namespace riscv_vector;
>
> namespace riscv_vector {
>
> +/* Return true if vlmax is constant value and can be used in vsetivl. */
> +static bool
> +const_vlmax_p (machine_mode mode)
> +{
> + poly_uint64 nuints = GET_MODE_NUNITS (mode);
> +
> + return nuints.is_constant ()
> + /* The vsetivli can only hold register 0~31. */
> + ? (IN_RANGE (nuints.to_constant (), 0, 31))
> + /* Only allowed in VLS-VLMAX mode. */
> + : false;
> +}
> +
> template <int MAX_OPERANDS> class insn_expander
> {
> public:
> @@ -101,12 +114,19 @@ public:
>
> void set_len_and_policy (rtx len, bool force_vlmax = false)
> {
> - bool vlmax_p = force_vlmax;
> + bool vlmax_p = force_vlmax || !len;
> gcc_assert (has_dest);
>
> - if (!len)
> + if (vlmax_p && const_vlmax_p (dest_mode))
> + {
> + /* Optimize VLS-VLMAX code gen, we can use vsetivli instead of the
> + vsetvli to obtain the value of vlmax. */
> + poly_uint64 nunits = GET_MODE_NUNITS (dest_mode);
> + len = gen_int_mode (nunits, Pmode);
> + vlmax_p = false; /* It has became NONVLMAX now. */
> + }
> + else if (!len)
> {
> - vlmax_p = true;
> len = gen_reg_rtx (Pmode);
> emit_vlmax_vsetvl (dest_mode, len);
> }
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-1.c
> new file mode 100644
> index 00000000000..11adf6bc611
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-1.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=fixed-vlmax" } */
> +
> +#include <stdint-gcc.h>
> +
> +typedef int8_t vnx2qi __attribute__ ((vector_size (2)));
> +
> +__attribute__ ((noipa)) void
> +f_vnx2qi (int8_t a, int8_t b, int8_t *out)
> +{
> + vnx2qi v = {a, b};
> + *(vnx2qi *) out = v;
> +}
> +
> +/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*2,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 1 } } */
> --
> 2.34.1
>
Committed. Thank you, Kito.
Pan
-----Original Message-----
From: Kito Cheng <kito.cheng@gmail.com>
Sent: Monday, May 15, 2023 4:35 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@sifive.com; Wang, Yanzhang <yanzhang.wang@intel.com>
Subject: Re: [PATCH v2] RISC-V: Optimize vsetvl AVL for VLS VLMAX auto-vectorization
LGTM, thanks :)
On Mon, May 15, 2023 at 4:33 PM Pan Li via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> This patch is optimizing the AVL for VLS auto-vectorzation.
>
> Given below sample code:
>
> typedef int8_t vnx2qi __attribute__ ((vector_size (2)));
>
> __attribute__ ((noipa)) void
> f_vnx2qi (int8_t a, int8_t b, int8_t *out)
> {
> vnx2qi v = {a, b};
> *(vnx2qi *) out = v;
> }
>
> Before this patch:
> f_vnx2qi:
> vsetvli a5,zero,e8,mf8,ta,ma
> vmv.v.x v1,a0
> vslide1down.vx v1,v1,a1
> vse8.v v1,0(a2)
> ret
>
> After this patch:
> f_vnx2qi:
> vsetivli zero,2,e8,mf8,ta,ma
> vmv.v.x v1,a0
> vslide1down.vx v1,v1,a1
> vse8.v v1,0(a2)
> ret
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> Co-authored-by: Juzhe-Zhong <juzhe.zhong@rivai.ai>
> Co-authored-by: kito-cheng <kito.cheng@sifive.com>
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-v.cc (const_vlmax_p): New function for
> deciding the mode is constant or not.
> (set_len_and_policy): Optimize VLS-VLMAX code gen to vsetivli.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/base/vf_avl-1.c: New test.
> ---
> gcc/config/riscv/riscv-v.cc | 26 ++++++++++++++++---
> .../gcc.target/riscv/rvv/base/vf_avl-1.c | 15 +++++++++++
> 2 files changed, 38 insertions(+), 3 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-1.c
>
> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> index b8dc333f54e..d65e7300303 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -50,6 +50,19 @@ using namespace riscv_vector;
>
> namespace riscv_vector {
>
> +/* Return true if vlmax is constant value and can be used in vsetivl. */
> +static bool
> +const_vlmax_p (machine_mode mode)
> +{
> + poly_uint64 nuints = GET_MODE_NUNITS (mode);
> +
> + return nuints.is_constant ()
> + /* The vsetivli can only hold register 0~31. */
> + ? (IN_RANGE (nuints.to_constant (), 0, 31))
> + /* Only allowed in VLS-VLMAX mode. */
> + : false;
> +}
> +
> template <int MAX_OPERANDS> class insn_expander
> {
> public:
> @@ -101,12 +114,19 @@ public:
>
> void set_len_and_policy (rtx len, bool force_vlmax = false)
> {
> - bool vlmax_p = force_vlmax;
> + bool vlmax_p = force_vlmax || !len;
> gcc_assert (has_dest);
>
> - if (!len)
> + if (vlmax_p && const_vlmax_p (dest_mode))
> + {
> + /* Optimize VLS-VLMAX code gen, we can use vsetivli instead of the
> + vsetvli to obtain the value of vlmax. */
> + poly_uint64 nunits = GET_MODE_NUNITS (dest_mode);
> + len = gen_int_mode (nunits, Pmode);
> + vlmax_p = false; /* It has became NONVLMAX now. */
> + }
> + else if (!len)
> {
> - vlmax_p = true;
> len = gen_reg_rtx (Pmode);
> emit_vlmax_vsetvl (dest_mode, len);
> }
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-1.c
> new file mode 100644
> index 00000000000..11adf6bc611
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-1.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=fixed-vlmax" } */
> +
> +#include <stdint-gcc.h>
> +
> +typedef int8_t vnx2qi __attribute__ ((vector_size (2)));
> +
> +__attribute__ ((noipa)) void
> +f_vnx2qi (int8_t a, int8_t b, int8_t *out)
> +{
> + vnx2qi v = {a, b};
> + *(vnx2qi *) out = v;
> +}
> +
> +/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*2,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 1 } } */
> --
> 2.34.1
>
@@ -50,6 +50,19 @@ using namespace riscv_vector;
namespace riscv_vector {
+/* Return true if vlmax is constant value and can be used in vsetivl. */
+static bool
+const_vlmax_p (machine_mode mode)
+{
+ poly_uint64 nuints = GET_MODE_NUNITS (mode);
+
+ return nuints.is_constant ()
+ /* The vsetivli can only hold register 0~31. */
+ ? (IN_RANGE (nuints.to_constant (), 0, 31))
+ /* Only allowed in VLS-VLMAX mode. */
+ : false;
+}
+
template <int MAX_OPERANDS> class insn_expander
{
public:
@@ -101,12 +114,19 @@ public:
void set_len_and_policy (rtx len, bool force_vlmax = false)
{
- bool vlmax_p = force_vlmax;
+ bool vlmax_p = force_vlmax || !len;
gcc_assert (has_dest);
- if (!len)
+ if (vlmax_p && const_vlmax_p (dest_mode))
+ {
+ /* Optimize VLS-VLMAX code gen, we can use vsetivli instead of the
+ vsetvli to obtain the value of vlmax. */
+ poly_uint64 nunits = GET_MODE_NUNITS (dest_mode);
+ len = gen_int_mode (nunits, Pmode);
+ vlmax_p = false; /* It has became NONVLMAX now. */
+ }
+ else if (!len)
{
- vlmax_p = true;
len = gen_reg_rtx (Pmode);
emit_vlmax_vsetvl (dest_mode, len);
}
new file mode 100644
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include <stdint-gcc.h>
+
+typedef int8_t vnx2qi __attribute__ ((vector_size (2)));
+
+__attribute__ ((noipa)) void
+f_vnx2qi (int8_t a, int8_t b, int8_t *out)
+{
+ vnx2qi v = {a, b};
+ *(vnx2qi *) out = v;
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*2,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 1 } } */