RISC-V: Add Veyron V1 pipeline description
Checks
Commit Message
gcc/ChangeLog:
* config/riscv/riscv-cores.def: Add veyron-v1
core and tune info.
* config/riscv/riscv-opts.h
(riscv_microarchitecture_type): Add veyron-v1.
* config/riscv/riscv.cc (veyron_v1_tune_info): New.
* config/riscv/riscv.md: Include veyron-v1.md.
(tune): Add veyron-v1.
* config/riscv/veyron-v1.md: New file.
* doc/invoke.texi (mcpu): Add veyron-v1.
(mtune): Add veyron-v1.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/divmod-2.c: Enable test for veyron-v1.
---
gcc/config/riscv/riscv-cores.def | 4 +
gcc/config/riscv/riscv-opts.h | 3 +-
gcc/config/riscv/riscv.cc | 15 +++
gcc/config/riscv/riscv.md | 3 +-
gcc/config/riscv/veyron-v1.md | 121 ++++++++++++++++++++++
gcc/doc/invoke.texi | 5 +-
gcc/testsuite/gcc.target/riscv/divmod-2.c | 5 +-
7 files changed, 149 insertions(+), 7 deletions(-)
create mode 100644 gcc/config/riscv/veyron-v1.md
Comments
I would like vendor cpu name start with vendor name, like ventana-veyron-v1
which is consistent with all other vendor cpu, and llvm are using same
convention too.
Raphael Moreira Zinsly <rzinsly@ventanamicro.com>於 2023年6月7日 週三,21:18寫道:
> gcc/ChangeLog:
>
> * config/riscv/riscv-cores.def: Add veyron-v1
> core and tune info.
> * config/riscv/riscv-opts.h
> (riscv_microarchitecture_type): Add veyron-v1.
> * config/riscv/riscv.cc (veyron_v1_tune_info): New.
> * config/riscv/riscv.md: Include veyron-v1.md.
> (tune): Add veyron-v1.
> * config/riscv/veyron-v1.md: New file.
> * doc/invoke.texi (mcpu): Add veyron-v1.
> (mtune): Add veyron-v1.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/divmod-2.c: Enable test for veyron-v1.
> ---
> gcc/config/riscv/riscv-cores.def | 4 +
> gcc/config/riscv/riscv-opts.h | 3 +-
> gcc/config/riscv/riscv.cc | 15 +++
> gcc/config/riscv/riscv.md | 3 +-
> gcc/config/riscv/veyron-v1.md | 121 ++++++++++++++++++++++
> gcc/doc/invoke.texi | 5 +-
> gcc/testsuite/gcc.target/riscv/divmod-2.c | 5 +-
> 7 files changed, 149 insertions(+), 7 deletions(-)
> create mode 100644 gcc/config/riscv/veyron-v1.md
>
> diff --git a/gcc/config/riscv/riscv-cores.def
> b/gcc/config/riscv/riscv-cores.def
> index 7d87ab7ce28..4078439e562 100644
> --- a/gcc/config/riscv/riscv-cores.def
> +++ b/gcc/config/riscv/riscv-cores.def
> @@ -38,6 +38,7 @@ RISCV_TUNE("sifive-3-series", generic, rocket_tune_info)
> RISCV_TUNE("sifive-5-series", generic, rocket_tune_info)
> RISCV_TUNE("sifive-7-series", sifive_7, sifive_7_tune_info)
> RISCV_TUNE("thead-c906", generic, thead_c906_tune_info)
> +RISCV_TUNE("veyron-v1", veyron_v1, veyron_v1_tune_info)
> RISCV_TUNE("size", generic, optimize_size_tune_info)
>
> #undef RISCV_TUNE
> @@ -77,4 +78,7 @@ RISCV_CORE("thead-c906",
> "rv64imafdc_xtheadba_xtheadbb_xtheadbs_xtheadcmo_"
> "xtheadcondmov_xtheadfmemidx_xtheadmac_"
> "xtheadmemidx_xtheadmempair_xtheadsync",
> "thead-c906")
> +
> +RISCV_CORE("veyron-v1",
> "rv64imafdc_zba_zbb_zbc_zbs_zifencei_xventanacondops",
> + "veyron-v1")
> #undef RISCV_CORE
> diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
> index f34ca993689..8f7dd84115f 100644
> --- a/gcc/config/riscv/riscv-opts.h
> +++ b/gcc/config/riscv/riscv-opts.h
> @@ -52,7 +52,8 @@ extern enum riscv_isa_spec_class riscv_isa_spec;
> /* Keep this list in sync with define_attr "tune" in riscv.md. */
> enum riscv_microarchitecture_type {
> generic,
> - sifive_7
> + sifive_7,
> + veyron_v1
> };
> extern enum riscv_microarchitecture_type riscv_microarchitecture;
>
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index 3954fc07a8b..6a5e89b4813 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -366,6 +366,21 @@ static const struct riscv_tune_param
> thead_c906_tune_info = {
> false /* use_divmod_expansion */
> };
>
> +/* Costs to use when optimizing for Ventana Micro Veyron V1. */
> +static const struct riscv_tune_param veyron_v1_tune_info = {
> + {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
> + {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
> + {COSTS_N_INSNS (9), COSTS_N_INSNS (17)}, /* fp_div */
> + {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
> + {COSTS_N_INSNS (12), COSTS_N_INSNS (20)}, /* int_div */
> + 4, /* issue_rate */
> + 4, /* branch_cost */
> + 5, /* memory_cost */
> + 8, /* fmv_cost */
> + false, /* slow_unaligned_access */
> + true, /* use_divmod_expansion */
> +};
> +
> /* Costs to use when optimizing for size. */
> static const struct riscv_tune_param optimize_size_tune_info = {
> {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_add */
> diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
> index 124d8c95804..90f0c1b1cf1 100644
> --- a/gcc/config/riscv/riscv.md
> +++ b/gcc/config/riscv/riscv.md
> @@ -482,7 +482,7 @@
> ;; Microarchitectures we know how to tune for.
> ;; Keep this in sync with enum riscv_microarchitecture.
> (define_attr "tune"
> - "generic,sifive_7"
> + "generic,sifive_7,veyron_v1"
> (const (symbol_ref "((enum attr_tune) riscv_microarchitecture)")))
>
> ;; Describe a user's asm statement.
> @@ -3123,3 +3123,4 @@
> (include "sifive-7.md")
> (include "thead.md")
> (include "vector.md")
> +(include "veyron-v1.md")
> diff --git a/gcc/config/riscv/veyron-v1.md b/gcc/config/riscv/veyron-v1.md
> new file mode 100644
> index 00000000000..3eeff76d9b0
> --- /dev/null
> +++ b/gcc/config/riscv/veyron-v1.md
> @@ -0,0 +1,121 @@
> +;; Scheduling pipeline description for Veyron V1 RISC-V.
> +;; Copyright (C) 2023 Free Software Foundation, Inc.
> +
> +;; This file is part of GCC.
> +
> +;; GCC is free software; you can redistribute it and/or modify it
> +;; under the terms of the GNU General Public License as published
> +;; by the Free Software Foundation; either version 3, or (at your
> +;; option) any later version.
> +
> +;; GCC is distributed in the hope that it will be useful, but WITHOUT
> +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
> +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
> +;; License for more details.
> +
> +;; You should have received a copy of the GNU General Public License
> +;; along with GCC; see the file COPYING3. If not see
> +;; <http://www.gnu.org/licenses/>.
> +
> +
> +(define_automaton "veyron_v1")
> +
> +;; 5 conceptual units of the processor:
> +;; The 4 symmetric ALUs
> +;; The execution FPU (fadd, fmul, fmadd, comparisons, etc)
> +;; The data transfer FPU
> +;; The shared multi-cycle ALU for integer mul, div, etc
> +;; The fdiv/fsqrt unit in the FPU
> +
> +(define_cpu_unit "ixu0_v1,ixu1_v1,ixu2_v1,ixu3_v1" "veyron_v1")
> +(define_cpu_unit "veyron_v1_fxu" "veyron_v1")
> +(define_cpu_unit "veyron_v1_fxu_xfer" "veyron_v1")
> +
> +(define_cpu_unit "veyron_v1_multi" "veyron_v1")
> +(define_cpu_unit "veyron_v1_div" "veyron_v1")
> +
> +;; Shortcut for reserving one of the symmetric ALU units.
> +(define_reservation "veyron_v1_ixu"
> + "ixu0_v1|ixu1_v1|ixu2_v1|ixu3_v1")
> +
> +(define_insn_reservation "veyron_v1_alu" 2
> + (and (eq_attr "tune" "veyron_v1")
> + (eq_attr "type"
> "unknown,const,arith,shift,slt,multi,auipc,nop,logical,move,bitmanip,min,max,minu,maxu,clz,ctz"))
> + "veyron_v1_ixu")
> +
> +(define_insn_reservation "veyron_v1_store" 1
> + (and (eq_attr "tune" "veyron_v1")
> + (eq_attr "type" "store"))
> + "veyron_v1_ixu")
> +
> +(define_insn_reservation "veyron_v1_ixu" 4
> + (and (eq_attr "tune" "veyron_v1")
> + (eq_attr "type" "load"))
> + "veyron_v1_ixu")
> +
> +(define_insn_reservation "veyron_v1_fpload" 6
> + (and (eq_attr "tune" "veyron_v1")
> + (eq_attr "type" "fpload"))
> + "veyron_v1_ixu")
> +
> +(define_insn_reservation "veyron_v1_xfer" 4
> + (and (eq_attr "tune" "veyron_v1")
> + (eq_attr "type" "mfc,mtc"))
> + "veyron_v1_ixu+veyron_v1_multi,veyron_v1_multi*3")
> +
> +(define_insn_reservation "veyron_v1_fpstore" 1
> + (and (eq_attr "tune" "veyron_v1")
> + (eq_attr "type" "fpstore"))
> + "veyron_v1_ixu+veyron_v1_fxu_xfer")
> +
> +(define_insn_reservation "veyron_v1_fmove" 2
> + (and (eq_attr "tune" "veyron_v1")
> + (eq_attr "type" "fmove"))
> + "veyron_v1_ixu+veyron_v1_fxu_xfer")
> +
> +(define_insn_reservation "veyron_v1_fcvt" 5
> + (and (eq_attr "tune" "veyron_v1")
> + (eq_attr "type" "fcvt"))
> + "veyron_v1_ixu+veyron_v1_fxu_xfer")
> +
> +(define_insn_reservation "veyron_v1_fpcmp" 2
> + (and (eq_attr "tune" "veyron_v1")
> + (eq_attr "type" "fcmp"))
> + "veyron_v1_ixu+veyron_v1_fxu")
> +
> +(define_insn_reservation "veyron_v1_imul" 4
> + (and (eq_attr "tune" "veyron_v1")
> + (eq_attr "type" "imul"))
> + "veyron_v1_ixu+veyron_v1_multi")
> +
> +(define_insn_reservation "veyron_v1_idiv" 20
> + (and (eq_attr "tune" "veyron_v1")
> + (eq_attr "type" "idiv"))
> + "veyron_v1_ixu+veyron_v1_multi,veyron_v1_multi*19")
> +
> +(define_insn_reservation "veyron_v1_fpa" 3
> + (and (eq_attr "tune" "veyron_v1")
> + (eq_attr "type" "fadd,fmul"))
> + "veyron_v1_ixu+veyron_v1_fxu")
> +
> +(define_insn_reservation "veyron_v1_fmadd" 5
> + (and (eq_attr "tune" "veyron_v1")
> + (eq_attr "type" "fmadd"))
> + "veyron_v1_ixu+veyron_v1_fxu")
> +
> +(define_insn_reservation "veyron_v1_fpdivsqrt_single" 9
> + (and (eq_attr "tune" "veyron_v1")
> + (and (eq_attr "type" "fdiv,fsqrt")
> + (eq_attr "mode" "SF")))
> + "veyron_v1_ixu+veyron_v1_fxu+veyron_v1_div,veyron_v1_div*8")
> +
> +(define_insn_reservation "veyron_v1_fpdivsqrt_double" 17
> + (and (eq_attr "tune" "veyron_v1")
> + (and (eq_attr "type" "fdiv,fsqrt")
> + (eq_attr "mode" "DF")))
> + "veyron_v1_ixu+veyron_v1_fxu+veyron_v1_div,veyron_v1_div*16")
> +
> +(define_insn_reservation "veyron_v1_popcount" 4
> + (and (eq_attr "tune" "veyron_v1")
> + (eq_attr "type" "cpop"))
> + "veyron_v1_ixu+veyron_v1_multi,veyron_v1_multi*3")
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 898a88ce33e..b163be7ec5e 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -28995,14 +28995,15 @@ by particular CPU name.
> Permissible values for this option are: @samp{sifive-e20},
> @samp{sifive-e21},
> @samp{sifive-e24}, @samp{sifive-e31}, @samp{sifive-e34},
> @samp{sifive-e76},
> @samp{sifive-s21}, @samp{sifive-s51}, @samp{sifive-s54},
> @samp{sifive-s76},
> -@samp{sifive-u54}, and @samp{sifive-u74}.
> +@samp{sifive-u54}, @samp{sifive-u74} and @samp{veyron-v1}.
>
> @opindex mtune
> @item -mtune=@var{processor-string}
> Optimize the output for the given processor, specified by
> microarchitecture or
> particular CPU name. Permissible values for this option are:
> @samp{rocket},
> @samp{sifive-3-series}, @samp{sifive-5-series}, @samp{sifive-7-series},
> -@samp{thead-c906}, @samp{size}, and all valid options for @option{-mcpu=}.
> +@samp{thead-c906}, @samp{veyron-v1} and @samp{size}, and all valid
> options for
> +@option{-mcpu=}.
>
> When @option{-mtune=} is not specified, use the setting from
> @option{-mcpu},
> the default is @samp{rocket} if both are not specified.
> diff --git a/gcc/testsuite/gcc.target/riscv/divmod-2.c
> b/gcc/testsuite/gcc.target/riscv/divmod-2.c
> index dfd319e52c0..67330f88cf8 100644
> --- a/gcc/testsuite/gcc.target/riscv/divmod-2.c
> +++ b/gcc/testsuite/gcc.target/riscv/divmod-2.c
> @@ -1,7 +1,6 @@
> /* { dg-do compile } */
> -/* Skip this everywhere for now. Once we have a target with
> - divmod enabled, only skip for -O0, -O1, -Og, -Oz, -Os. */
> -/* { dg-skip-if "" { *-*-* } { } } */
> +/* { dg-options "-mtune=veyron-v1 -mabi=lp64" } */
> +/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" "-Oz" "-Os" } } */
>
> void
> foo(int a, int b, int *c, int *d)
> --
> 2.40.1
>
>
On 6/7/23 08:13, Kito Cheng wrote:
> I would like vendor cpu name start with vendor name, like
> ventana-veyron-v1 which is consistent with all other vendor cpu, and
> llvm are using same convention too.
Fair enough. Better to get it right now than have this stuff be
inconsistent. It'll be a little more pain for our internal folks, but
we'll deal with that :-)
Jeff
On 6/7/23 08:43, Jeff Law wrote:
>
>
> On 6/7/23 08:13, Kito Cheng wrote:
>> I would like vendor cpu name start with vendor name, like
>> ventana-veyron-v1 which is consistent with all other vendor cpu, and
>> llvm are using same convention too.
> Fair enough. Better to get it right now than have this stuff be
> inconsistent. It'll be a little more pain for our internal folks, but
> we'll deal with that :-)
I should have also noted that this seems to get a pretty consistent 1-2%
improvement across spec2017. Not surprisingly it reduces stalls at the
retirement unit due to instructions not being completed. We can see
impacts elsewhere like fewer stalls due to conflicting resources at the
dispatch stage.
It does make it more likely that we'll blow out the register file on
x264's key SATD routine which shows up as a single digit regression for
input #1. The fix there is pretty simple, use register pressure
scheduling, which we'll have some hard data on relatively soon.
jeff
> diff --git a/gcc/config/riscv/riscv-cores.def b/gcc/config/riscv/riscv-cores.def
> index 7d87ab7ce28..4078439e562 100644
> --- a/gcc/config/riscv/riscv-cores.def
> +++ b/gcc/config/riscv/riscv-cores.def
> @@ -38,6 +38,7 @@ RISCV_TUNE("sifive-3-series", generic, rocket_tune_info)
> RISCV_TUNE("sifive-5-series", generic, rocket_tune_info)
> RISCV_TUNE("sifive-7-series", sifive_7, sifive_7_tune_info)
> RISCV_TUNE("thead-c906", generic, thead_c906_tune_info)
> +RISCV_TUNE("veyron-v1", veyron_v1, veyron_v1_tune_info)
> RISCV_TUNE("size", generic, optimize_size_tune_info)
>
> #undef RISCV_TUNE
> @@ -77,4 +78,7 @@ RISCV_CORE("thead-c906", "rv64imafdc_xtheadba_xtheadbb_xtheadbs_xtheadcmo_"
> "xtheadcondmov_xtheadfmemidx_xtheadmac_"
> "xtheadmemidx_xtheadmempair_xtheadsync",
> "thead-c906")
> +
> +RISCV_CORE("veyron-v1", "rv64imafdc_zba_zbb_zbc_zbs_zifencei_xventanacondops",
> + "veyron-v1")
Seems like xventanacondops have not in the trunk yet, I saw Jeff has
approved before but not commit yet
https://patchwork.ozlabs.org/project/gcc/patch/20230210224150.2801962-11-philipp.tomsich@vrull.eu/
On Thu 8. Jun 2023 at 09:35, Kito Cheng via Gcc-patches <
gcc-patches@gcc.gnu.org> wrote:
> > diff --git a/gcc/config/riscv/riscv-cores.def
> b/gcc/config/riscv/riscv-cores.def
> > index 7d87ab7ce28..4078439e562 100644
> > --- a/gcc/config/riscv/riscv-cores.def
> > +++ b/gcc/config/riscv/riscv-cores.def
> > @@ -38,6 +38,7 @@ RISCV_TUNE("sifive-3-series", generic,
> rocket_tune_info)
> > RISCV_TUNE("sifive-5-series", generic, rocket_tune_info)
> > RISCV_TUNE("sifive-7-series", sifive_7, sifive_7_tune_info)
> > RISCV_TUNE("thead-c906", generic, thead_c906_tune_info)
> > +RISCV_TUNE("veyron-v1", veyron_v1, veyron_v1_tune_info)
> > RISCV_TUNE("size", generic, optimize_size_tune_info)
> >
> > #undef RISCV_TUNE
> > @@ -77,4 +78,7 @@ RISCV_CORE("thead-c906",
> "rv64imafdc_xtheadba_xtheadbb_xtheadbs_xtheadcmo_"
> > "xtheadcondmov_xtheadfmemidx_xtheadmac_"
> > "xtheadmemidx_xtheadmempair_xtheadsync",
> > "thead-c906")
> > +
> > +RISCV_CORE("veyron-v1",
> "rv64imafdc_zba_zbb_zbc_zbs_zifencei_xventanacondops",
> > + "veyron-v1")
>
> Seems like xventanacondops have not in the trunk yet, I saw Jeff has
> approved before but not commit yet
We couldn’t apply back then, as Veyro -V1 had been unnannounced.
Can we move this forward now?
Philipp.
>
> On Thu 8. Jun 2023 at 09:35, Kito Cheng via Gcc-patches <
> gcc-patches@gcc.gnu.org> wrote:
>
> > > diff --git a/gcc/config/riscv/riscv-cores.def
> > b/gcc/config/riscv/riscv-cores.def
> > > index 7d87ab7ce28..4078439e562 100644
> > > --- a/gcc/config/riscv/riscv-cores.def
> > > +++ b/gcc/config/riscv/riscv-cores.def
> > > @@ -38,6 +38,7 @@ RISCV_TUNE("sifive-3-series", generic,
> > rocket_tune_info)
> > > RISCV_TUNE("sifive-5-series", generic, rocket_tune_info)
> > > RISCV_TUNE("sifive-7-series", sifive_7, sifive_7_tune_info)
> > > RISCV_TUNE("thead-c906", generic, thead_c906_tune_info)
> > > +RISCV_TUNE("veyron-v1", veyron_v1, veyron_v1_tune_info)
> > > RISCV_TUNE("size", generic, optimize_size_tune_info)
> > >
> > > #undef RISCV_TUNE
> > > @@ -77,4 +78,7 @@ RISCV_CORE("thead-c906",
> > "rv64imafdc_xtheadba_xtheadbb_xtheadbs_xtheadcmo_"
> > > "xtheadcondmov_xtheadfmemidx_xtheadmac_"
> > > "xtheadmemidx_xtheadmempair_xtheadsync",
> > > "thead-c906")
> > > +
> > > +RISCV_CORE("veyron-v1",
> > "rv64imafdc_zba_zbb_zbc_zbs_zifencei_xventanacondops",
> > > + "veyron-v1")
> >
> > Seems like xventanacondops have not in the trunk yet, I saw Jeff has
> > approved before but not commit yet
>
>
> We couldn’t apply back then, as Veyro -V1 had been unnannounced.
> Can we move this forward now?
>
Oh, okay I got the awkness point...I am ok with that on gcc land, but I
would like binutils support that first, or remove the extension from the
mcpu for temporary before binutils support, otherwise it just a broken
support for that CPU on trunk gcc.
> Philipp.
>
> >
>
On 6/8/23 04:22, Kito Cheng wrote:
>
>
> Oh, okay I got the awkness point...I am ok with that on gcc land, but I
> would like binutils support that first, or remove the extension from the
> mcpu for temporary before binutils support, otherwise it just a broken
> support for that CPU on trunk gcc.
I pushed the binutils bits into the repo a couple months ago:
> commit 1656d3f8ef56a16745689c03269412988ebcaa54
> Author: Philipp Tomsich <philipp.tomsich@vrull.eu>
> Date: Wed Apr 26 14:09:34 2023 -0600
>
> RISC-V: Support XVentanaCondOps extension
[ ... ]
I'd very much like to see the condops go into GCC as well, but I've been
hesitant to move it forward myself. We're still waiting on hardware and
it wasn't clear to me that we really had consensus agreement to move the
bits forward based on an announcement vs waiting on actual hardware
availability (based on the comments from Palmer when I upstreamed the
binutils bits).
IIRC there was general consensus on rewriting the lowest level
primitives as if-then-else constructs. Something like this:
> (define_code_iterator eq_or_ne [eq ne])
> (define_code_attr n [(eq "") (ne "n")])
> (define_code_attr rev [(eq "n") (ne "")])
>
> (define_insn "*vt.maskc<n><mode>"
> [(set (match_operand:X 0 "register_operand" "=r")
> (if_then_else:X
> (eq_or_ne (match_operand:X 1 "register_operand" "r")
> (const_int 0))
> (const_int 0)
> (match_operand:X 2 "register_operand" "r")))]
> "TARGET_XVENTANACONDOPS"
> "vt.maskc<n>\t%0,%2,%1")
>
> (define_insn "*vt.maskc<n><mode>_reverse"
> [(set (match_operand:X 0 "register_operand" "=r")
> (if_then_else:X
> (eq_or_ne (match_operand:X 1 "register_operand" "r")
> (const_int 0))
> (match_operand:X 2 "register_operand" "r")
> (const_int 0)))]
> "TARGET_XVENTANACONDOPS"
> "vt.maskc<rev>\t%0,%2,%1")
That's what we're using internally these days. I would expect zicond to
work in exactly the same manner, but with a different instruction being
emitted.
We've also got bits here which wire this up in the conditional move
expander and which adjust the ifcvt.cc bits from VRULL to use the
if-then-else form. All this will be useful for zicond as well.
I don't mind letting zicond go first. It's frozen so it ought to be
non-controversial. We can then upstream the various improvements to
utilize zicond better. That moves things forward in a meaningful manner
and buys time to meet the hardware requirement for xventanacondops which
will be trivial to add if zicond is already supported.
Jeff
On Thu 8. Jun 2023 at 16:17, Jeff Law <jeffreyalaw@gmail.com> wrote:
>
>
> On 6/8/23 04:22, Kito Cheng wrote:
>
> >
> >
> > Oh, okay I got the awkness point...I am ok with that on gcc land, but I
> > would like binutils support that first, or remove the extension from the
> > mcpu for temporary before binutils support, otherwise it just a broken
> > support for that CPU on trunk gcc.
> I pushed the binutils bits into the repo a couple months ago:
>
> > commit 1656d3f8ef56a16745689c03269412988ebcaa54
> > Author: Philipp Tomsich <philipp.tomsich@vrull.eu>
> > Date: Wed Apr 26 14:09:34 2023 -0600
> >
> > RISC-V: Support XVentanaCondOps extension
> [ ... ]
>
> I'd very much like to see the condops go into GCC as well, but I've been
> hesitant to move it forward myself. We're still waiting on hardware and
> it wasn't clear to me that we really had consensus agreement to move the
> bits forward based on an announcement vs waiting on actual hardware
> availability (based on the comments from Palmer when I upstreamed the
> binutils bits).
Zicondops will go to ratification in the next couple of weeks, and the plan
is to revise the patches by then.
So I would propose that we move Zicond forward as that happens and (given
how small XVentanaCondOps is on-top of Zicond) we pick it up then.
> IIRC there was general consensus on rewriting the lowest level
That was part of the “moving forward”… this needs a rebase and a major
revision.
> primitives as if-then-else constructs. Something like this:
>
> > (define_code_iterator eq_or_ne [eq ne])
> > (define_code_attr n [(eq "") (ne "n")])
> > (define_code_attr rev [(eq "n") (ne "")])
> >
> > (define_insn "*vt.maskc<n><mode>"
> > [(set (match_operand:X 0 "register_operand" "=r")
> > (if_then_else:X
> > (eq_or_ne (match_operand:X 1 "register_operand" "r")
> > (const_int 0))
> > (const_int 0)
> > (match_operand:X 2 "register_operand" "r")))]
> > "TARGET_XVENTANACONDOPS"
> > "vt.maskc<n>\t%0,%2,%1")
> >
> > (define_insn "*vt.maskc<n><mode>_reverse"
> > [(set (match_operand:X 0 "register_operand" "=r")
> > (if_then_else:X
> > (eq_or_ne (match_operand:X 1 "register_operand" "r")
> > (const_int 0))
> > (match_operand:X 2 "register_operand" "r")
> > (const_int 0)))]
> > "TARGET_XVENTANACONDOPS"
> > "vt.maskc<rev>\t%0,%2,%1")
>
> That's what we're using internally these days. I would expect zicond to
> work in exactly the same manner, but with a different instruction being
> emitted.
>
> We've also got bits here which wire this up in the conditional move
> expander and which adjust the ifcvt.cc bits from VRULL to use the
> if-then-else form. All this will be useful for zicond as well.
>
> I don't mind letting zicond go first. It's frozen so it ought to be
> non-controversial. We can then upstream the various improvements to
> utilize zicond better. That moves things forward in a meaningful manner
> and buys time to meet the hardware requirement for xventanacondops which
> will be trivial to add if zicond is already supported.
>
>
>
>
> Jeff
>
> > I'd very much like to see the condops go into GCC as well, but I've been
> > hesitant to move it forward myself. We're still waiting on hardware and
> > it wasn't clear to me that we really had consensus agreement to move the
> > bits forward based on an announcement vs waiting on actual hardware
> > availability (based on the comments from Palmer when I upstreamed the
> > binutils bits).
My bad, apparently I grep wrong binutils so I thought it isn't in the tree yet,
I don't have strong opinion on that, so I would defer this to Palmer.
@@ -38,6 +38,7 @@ RISCV_TUNE("sifive-3-series", generic, rocket_tune_info)
RISCV_TUNE("sifive-5-series", generic, rocket_tune_info)
RISCV_TUNE("sifive-7-series", sifive_7, sifive_7_tune_info)
RISCV_TUNE("thead-c906", generic, thead_c906_tune_info)
+RISCV_TUNE("veyron-v1", veyron_v1, veyron_v1_tune_info)
RISCV_TUNE("size", generic, optimize_size_tune_info)
#undef RISCV_TUNE
@@ -77,4 +78,7 @@ RISCV_CORE("thead-c906", "rv64imafdc_xtheadba_xtheadbb_xtheadbs_xtheadcmo_"
"xtheadcondmov_xtheadfmemidx_xtheadmac_"
"xtheadmemidx_xtheadmempair_xtheadsync",
"thead-c906")
+
+RISCV_CORE("veyron-v1", "rv64imafdc_zba_zbb_zbc_zbs_zifencei_xventanacondops",
+ "veyron-v1")
#undef RISCV_CORE
@@ -52,7 +52,8 @@ extern enum riscv_isa_spec_class riscv_isa_spec;
/* Keep this list in sync with define_attr "tune" in riscv.md. */
enum riscv_microarchitecture_type {
generic,
- sifive_7
+ sifive_7,
+ veyron_v1
};
extern enum riscv_microarchitecture_type riscv_microarchitecture;
@@ -366,6 +366,21 @@ static const struct riscv_tune_param thead_c906_tune_info = {
false /* use_divmod_expansion */
};
+/* Costs to use when optimizing for Ventana Micro Veyron V1. */
+static const struct riscv_tune_param veyron_v1_tune_info = {
+ {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
+ {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
+ {COSTS_N_INSNS (9), COSTS_N_INSNS (17)}, /* fp_div */
+ {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
+ {COSTS_N_INSNS (12), COSTS_N_INSNS (20)}, /* int_div */
+ 4, /* issue_rate */
+ 4, /* branch_cost */
+ 5, /* memory_cost */
+ 8, /* fmv_cost */
+ false, /* slow_unaligned_access */
+ true, /* use_divmod_expansion */
+};
+
/* Costs to use when optimizing for size. */
static const struct riscv_tune_param optimize_size_tune_info = {
{COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_add */
@@ -482,7 +482,7 @@
;; Microarchitectures we know how to tune for.
;; Keep this in sync with enum riscv_microarchitecture.
(define_attr "tune"
- "generic,sifive_7"
+ "generic,sifive_7,veyron_v1"
(const (symbol_ref "((enum attr_tune) riscv_microarchitecture)")))
;; Describe a user's asm statement.
@@ -3123,3 +3123,4 @@
(include "sifive-7.md")
(include "thead.md")
(include "vector.md")
+(include "veyron-v1.md")
new file mode 100644
@@ -0,0 +1,121 @@
+;; Scheduling pipeline description for Veyron V1 RISC-V.
+;; Copyright (C) 2023 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+(define_automaton "veyron_v1")
+
+;; 5 conceptual units of the processor:
+;; The 4 symmetric ALUs
+;; The execution FPU (fadd, fmul, fmadd, comparisons, etc)
+;; The data transfer FPU
+;; The shared multi-cycle ALU for integer mul, div, etc
+;; The fdiv/fsqrt unit in the FPU
+
+(define_cpu_unit "ixu0_v1,ixu1_v1,ixu2_v1,ixu3_v1" "veyron_v1")
+(define_cpu_unit "veyron_v1_fxu" "veyron_v1")
+(define_cpu_unit "veyron_v1_fxu_xfer" "veyron_v1")
+
+(define_cpu_unit "veyron_v1_multi" "veyron_v1")
+(define_cpu_unit "veyron_v1_div" "veyron_v1")
+
+;; Shortcut for reserving one of the symmetric ALU units.
+(define_reservation "veyron_v1_ixu"
+ "ixu0_v1|ixu1_v1|ixu2_v1|ixu3_v1")
+
+(define_insn_reservation "veyron_v1_alu" 2
+ (and (eq_attr "tune" "veyron_v1")
+ (eq_attr "type" "unknown,const,arith,shift,slt,multi,auipc,nop,logical,move,bitmanip,min,max,minu,maxu,clz,ctz"))
+ "veyron_v1_ixu")
+
+(define_insn_reservation "veyron_v1_store" 1
+ (and (eq_attr "tune" "veyron_v1")
+ (eq_attr "type" "store"))
+ "veyron_v1_ixu")
+
+(define_insn_reservation "veyron_v1_ixu" 4
+ (and (eq_attr "tune" "veyron_v1")
+ (eq_attr "type" "load"))
+ "veyron_v1_ixu")
+
+(define_insn_reservation "veyron_v1_fpload" 6
+ (and (eq_attr "tune" "veyron_v1")
+ (eq_attr "type" "fpload"))
+ "veyron_v1_ixu")
+
+(define_insn_reservation "veyron_v1_xfer" 4
+ (and (eq_attr "tune" "veyron_v1")
+ (eq_attr "type" "mfc,mtc"))
+ "veyron_v1_ixu+veyron_v1_multi,veyron_v1_multi*3")
+
+(define_insn_reservation "veyron_v1_fpstore" 1
+ (and (eq_attr "tune" "veyron_v1")
+ (eq_attr "type" "fpstore"))
+ "veyron_v1_ixu+veyron_v1_fxu_xfer")
+
+(define_insn_reservation "veyron_v1_fmove" 2
+ (and (eq_attr "tune" "veyron_v1")
+ (eq_attr "type" "fmove"))
+ "veyron_v1_ixu+veyron_v1_fxu_xfer")
+
+(define_insn_reservation "veyron_v1_fcvt" 5
+ (and (eq_attr "tune" "veyron_v1")
+ (eq_attr "type" "fcvt"))
+ "veyron_v1_ixu+veyron_v1_fxu_xfer")
+
+(define_insn_reservation "veyron_v1_fpcmp" 2
+ (and (eq_attr "tune" "veyron_v1")
+ (eq_attr "type" "fcmp"))
+ "veyron_v1_ixu+veyron_v1_fxu")
+
+(define_insn_reservation "veyron_v1_imul" 4
+ (and (eq_attr "tune" "veyron_v1")
+ (eq_attr "type" "imul"))
+ "veyron_v1_ixu+veyron_v1_multi")
+
+(define_insn_reservation "veyron_v1_idiv" 20
+ (and (eq_attr "tune" "veyron_v1")
+ (eq_attr "type" "idiv"))
+ "veyron_v1_ixu+veyron_v1_multi,veyron_v1_multi*19")
+
+(define_insn_reservation "veyron_v1_fpa" 3
+ (and (eq_attr "tune" "veyron_v1")
+ (eq_attr "type" "fadd,fmul"))
+ "veyron_v1_ixu+veyron_v1_fxu")
+
+(define_insn_reservation "veyron_v1_fmadd" 5
+ (and (eq_attr "tune" "veyron_v1")
+ (eq_attr "type" "fmadd"))
+ "veyron_v1_ixu+veyron_v1_fxu")
+
+(define_insn_reservation "veyron_v1_fpdivsqrt_single" 9
+ (and (eq_attr "tune" "veyron_v1")
+ (and (eq_attr "type" "fdiv,fsqrt")
+ (eq_attr "mode" "SF")))
+ "veyron_v1_ixu+veyron_v1_fxu+veyron_v1_div,veyron_v1_div*8")
+
+(define_insn_reservation "veyron_v1_fpdivsqrt_double" 17
+ (and (eq_attr "tune" "veyron_v1")
+ (and (eq_attr "type" "fdiv,fsqrt")
+ (eq_attr "mode" "DF")))
+ "veyron_v1_ixu+veyron_v1_fxu+veyron_v1_div,veyron_v1_div*16")
+
+(define_insn_reservation "veyron_v1_popcount" 4
+ (and (eq_attr "tune" "veyron_v1")
+ (eq_attr "type" "cpop"))
+ "veyron_v1_ixu+veyron_v1_multi,veyron_v1_multi*3")
@@ -28995,14 +28995,15 @@ by particular CPU name.
Permissible values for this option are: @samp{sifive-e20}, @samp{sifive-e21},
@samp{sifive-e24}, @samp{sifive-e31}, @samp{sifive-e34}, @samp{sifive-e76},
@samp{sifive-s21}, @samp{sifive-s51}, @samp{sifive-s54}, @samp{sifive-s76},
-@samp{sifive-u54}, and @samp{sifive-u74}.
+@samp{sifive-u54}, @samp{sifive-u74} and @samp{veyron-v1}.
@opindex mtune
@item -mtune=@var{processor-string}
Optimize the output for the given processor, specified by microarchitecture or
particular CPU name. Permissible values for this option are: @samp{rocket},
@samp{sifive-3-series}, @samp{sifive-5-series}, @samp{sifive-7-series},
-@samp{thead-c906}, @samp{size}, and all valid options for @option{-mcpu=}.
+@samp{thead-c906}, @samp{veyron-v1} and @samp{size}, and all valid options for
+@option{-mcpu=}.
When @option{-mtune=} is not specified, use the setting from @option{-mcpu},
the default is @samp{rocket} if both are not specified.
@@ -1,7 +1,6 @@
/* { dg-do compile } */
-/* Skip this everywhere for now. Once we have a target with
- divmod enabled, only skip for -O0, -O1, -Og, -Oz, -Os. */
-/* { dg-skip-if "" { *-*-* } { } } */
+/* { dg-options "-mtune=veyron-v1 -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" "-Oz" "-Os" } } */
void
foo(int a, int b, int *c, int *d)