RISC-V: Add Veyron V1 pipeline description

Message ID 20230607131749.82794-1-rzinsly@ventanamicro.com
State Accepted
Headers
Series RISC-V: Add Veyron V1 pipeline description |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

Raphael Moreira Zinsly June 7, 2023, 1:17 p.m. UTC
  gcc/ChangeLog:

	* config/riscv/riscv-cores.def: Add veyron-v1
	core and tune info.
	* config/riscv/riscv-opts.h
	(riscv_microarchitecture_type): Add veyron-v1.
	* config/riscv/riscv.cc (veyron_v1_tune_info): New.
	* config/riscv/riscv.md: Include veyron-v1.md.
	(tune): Add veyron-v1.
	* config/riscv/veyron-v1.md: New file.
	* doc/invoke.texi (mcpu): Add veyron-v1.
	(mtune): Add veyron-v1.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/divmod-2.c: Enable test for veyron-v1.
---
 gcc/config/riscv/riscv-cores.def          |   4 +
 gcc/config/riscv/riscv-opts.h             |   3 +-
 gcc/config/riscv/riscv.cc                 |  15 +++
 gcc/config/riscv/riscv.md                 |   3 +-
 gcc/config/riscv/veyron-v1.md             | 121 ++++++++++++++++++++++
 gcc/doc/invoke.texi                       |   5 +-
 gcc/testsuite/gcc.target/riscv/divmod-2.c |   5 +-
 7 files changed, 149 insertions(+), 7 deletions(-)
 create mode 100644 gcc/config/riscv/veyron-v1.md
  

Comments

Kito Cheng June 7, 2023, 2:13 p.m. UTC | #1
I would like vendor cpu name start with vendor name, like ventana-veyron-v1
which is consistent with all other vendor cpu, and llvm are using same
convention too.

Raphael Moreira Zinsly <rzinsly@ventanamicro.com>於 2023年6月7日 週三,21:18寫道:

> gcc/ChangeLog:
>
>         * config/riscv/riscv-cores.def: Add veyron-v1
>         core and tune info.
>         * config/riscv/riscv-opts.h
>         (riscv_microarchitecture_type): Add veyron-v1.
>         * config/riscv/riscv.cc (veyron_v1_tune_info): New.
>         * config/riscv/riscv.md: Include veyron-v1.md.
>         (tune): Add veyron-v1.
>         * config/riscv/veyron-v1.md: New file.
>         * doc/invoke.texi (mcpu): Add veyron-v1.
>         (mtune): Add veyron-v1.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/riscv/divmod-2.c: Enable test for veyron-v1.
> ---
>  gcc/config/riscv/riscv-cores.def          |   4 +
>  gcc/config/riscv/riscv-opts.h             |   3 +-
>  gcc/config/riscv/riscv.cc                 |  15 +++
>  gcc/config/riscv/riscv.md                 |   3 +-
>  gcc/config/riscv/veyron-v1.md             | 121 ++++++++++++++++++++++
>  gcc/doc/invoke.texi                       |   5 +-
>  gcc/testsuite/gcc.target/riscv/divmod-2.c |   5 +-
>  7 files changed, 149 insertions(+), 7 deletions(-)
>  create mode 100644 gcc/config/riscv/veyron-v1.md
>
> diff --git a/gcc/config/riscv/riscv-cores.def
> b/gcc/config/riscv/riscv-cores.def
> index 7d87ab7ce28..4078439e562 100644
> --- a/gcc/config/riscv/riscv-cores.def
> +++ b/gcc/config/riscv/riscv-cores.def
> @@ -38,6 +38,7 @@ RISCV_TUNE("sifive-3-series", generic, rocket_tune_info)
>  RISCV_TUNE("sifive-5-series", generic, rocket_tune_info)
>  RISCV_TUNE("sifive-7-series", sifive_7, sifive_7_tune_info)
>  RISCV_TUNE("thead-c906", generic, thead_c906_tune_info)
> +RISCV_TUNE("veyron-v1", veyron_v1, veyron_v1_tune_info)
>  RISCV_TUNE("size", generic, optimize_size_tune_info)
>
>  #undef RISCV_TUNE
> @@ -77,4 +78,7 @@ RISCV_CORE("thead-c906",
> "rv64imafdc_xtheadba_xtheadbb_xtheadbs_xtheadcmo_"
>                               "xtheadcondmov_xtheadfmemidx_xtheadmac_"
>                               "xtheadmemidx_xtheadmempair_xtheadsync",
>                               "thead-c906")
> +
> +RISCV_CORE("veyron-v1",
>  "rv64imafdc_zba_zbb_zbc_zbs_zifencei_xventanacondops",
> +                             "veyron-v1")
>  #undef RISCV_CORE
> diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
> index f34ca993689..8f7dd84115f 100644
> --- a/gcc/config/riscv/riscv-opts.h
> +++ b/gcc/config/riscv/riscv-opts.h
> @@ -52,7 +52,8 @@ extern enum riscv_isa_spec_class riscv_isa_spec;
>  /* Keep this list in sync with define_attr "tune" in riscv.md.  */
>  enum riscv_microarchitecture_type {
>    generic,
> -  sifive_7
> +  sifive_7,
> +  veyron_v1
>  };
>  extern enum riscv_microarchitecture_type riscv_microarchitecture;
>
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index 3954fc07a8b..6a5e89b4813 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -366,6 +366,21 @@ static const struct riscv_tune_param
> thead_c906_tune_info = {
>    false                /* use_divmod_expansion */
>  };
>
> +/* Costs to use when optimizing for Ventana Micro Veyron V1.  */
> +static const struct riscv_tune_param veyron_v1_tune_info = {
> +  {COSTS_N_INSNS (4), COSTS_N_INSNS (5)},      /* fp_add */
> +  {COSTS_N_INSNS (4), COSTS_N_INSNS (5)},      /* fp_mul */
> +  {COSTS_N_INSNS (9), COSTS_N_INSNS (17)},     /* fp_div */
> +  {COSTS_N_INSNS (4), COSTS_N_INSNS (4)},      /* int_mul */
> +  {COSTS_N_INSNS (12), COSTS_N_INSNS (20)},    /* int_div */
> +  4,                                           /* issue_rate */
> +  4,                                           /* branch_cost */
> +  5,                                           /* memory_cost */
> +  8,                                           /* fmv_cost */
> +  false,                                       /* slow_unaligned_access */
> +  true,                                        /* use_divmod_expansion */
> +};
> +
>  /* Costs to use when optimizing for size.  */
>  static const struct riscv_tune_param optimize_size_tune_info = {
>    {COSTS_N_INSNS (1), COSTS_N_INSNS (1)},      /* fp_add */
> diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
> index 124d8c95804..90f0c1b1cf1 100644
> --- a/gcc/config/riscv/riscv.md
> +++ b/gcc/config/riscv/riscv.md
> @@ -482,7 +482,7 @@
>  ;; Microarchitectures we know how to tune for.
>  ;; Keep this in sync with enum riscv_microarchitecture.
>  (define_attr "tune"
> -  "generic,sifive_7"
> +  "generic,sifive_7,veyron_v1"
>    (const (symbol_ref "((enum attr_tune) riscv_microarchitecture)")))
>
>  ;; Describe a user's asm statement.
> @@ -3123,3 +3123,4 @@
>  (include "sifive-7.md")
>  (include "thead.md")
>  (include "vector.md")
> +(include "veyron-v1.md")
> diff --git a/gcc/config/riscv/veyron-v1.md b/gcc/config/riscv/veyron-v1.md
> new file mode 100644
> index 00000000000..3eeff76d9b0
> --- /dev/null
> +++ b/gcc/config/riscv/veyron-v1.md
> @@ -0,0 +1,121 @@
> +;; Scheduling pipeline description for Veyron V1 RISC-V.
> +;; Copyright (C) 2023 Free Software Foundation, Inc.
> +
> +;; This file is part of GCC.
> +
> +;; GCC is free software; you can redistribute it and/or modify it
> +;; under the terms of the GNU General Public License as published
> +;; by the Free Software Foundation; either version 3, or (at your
> +;; option) any later version.
> +
> +;; GCC is distributed in the hope that it will be useful, but WITHOUT
> +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
> +;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
> +;; License for more details.
> +
> +;; You should have received a copy of the GNU General Public License
> +;; along with GCC; see the file COPYING3.  If not see
> +;; <http://www.gnu.org/licenses/>.
> +
> +
> +(define_automaton "veyron_v1")
> +
> +;; 5 conceptual units of the processor:
> +;;   The 4 symmetric ALUs
> +;;   The execution FPU (fadd, fmul, fmadd, comparisons, etc)
> +;;   The data transfer FPU
> +;;   The shared multi-cycle ALU for integer mul, div, etc
> +;;   The fdiv/fsqrt unit in the FPU
> +
> +(define_cpu_unit "ixu0_v1,ixu1_v1,ixu2_v1,ixu3_v1" "veyron_v1")
> +(define_cpu_unit "veyron_v1_fxu" "veyron_v1")
> +(define_cpu_unit "veyron_v1_fxu_xfer" "veyron_v1")
> +
> +(define_cpu_unit "veyron_v1_multi" "veyron_v1")
> +(define_cpu_unit "veyron_v1_div" "veyron_v1")
> +
> +;; Shortcut for reserving one of the symmetric ALU units.
> +(define_reservation "veyron_v1_ixu"
> +                   "ixu0_v1|ixu1_v1|ixu2_v1|ixu3_v1")
> +
> +(define_insn_reservation "veyron_v1_alu" 2
> +  (and (eq_attr "tune" "veyron_v1")
> +       (eq_attr "type"
> "unknown,const,arith,shift,slt,multi,auipc,nop,logical,move,bitmanip,min,max,minu,maxu,clz,ctz"))
> +  "veyron_v1_ixu")
> +
> +(define_insn_reservation "veyron_v1_store" 1
> +  (and (eq_attr "tune" "veyron_v1")
> +       (eq_attr "type" "store"))
> +  "veyron_v1_ixu")
> +
> +(define_insn_reservation "veyron_v1_ixu" 4
> +  (and (eq_attr "tune" "veyron_v1")
> +       (eq_attr "type" "load"))
> +  "veyron_v1_ixu")
> +
> +(define_insn_reservation "veyron_v1_fpload" 6
> +  (and (eq_attr "tune" "veyron_v1")
> +       (eq_attr "type" "fpload"))
> +  "veyron_v1_ixu")
> +
> +(define_insn_reservation "veyron_v1_xfer" 4
> +  (and (eq_attr "tune" "veyron_v1")
> +       (eq_attr "type" "mfc,mtc"))
> +  "veyron_v1_ixu+veyron_v1_multi,veyron_v1_multi*3")
> +
> +(define_insn_reservation "veyron_v1_fpstore" 1
> +  (and (eq_attr "tune" "veyron_v1")
> +       (eq_attr "type" "fpstore"))
> +  "veyron_v1_ixu+veyron_v1_fxu_xfer")
> +
> +(define_insn_reservation "veyron_v1_fmove" 2
> +  (and (eq_attr "tune" "veyron_v1")
> +       (eq_attr "type" "fmove"))
> +  "veyron_v1_ixu+veyron_v1_fxu_xfer")
> +
> +(define_insn_reservation "veyron_v1_fcvt" 5
> +  (and (eq_attr "tune" "veyron_v1")
> +       (eq_attr "type" "fcvt"))
> +  "veyron_v1_ixu+veyron_v1_fxu_xfer")
> +
> +(define_insn_reservation "veyron_v1_fpcmp" 2
> +  (and (eq_attr "tune" "veyron_v1")
> +       (eq_attr "type" "fcmp"))
> +  "veyron_v1_ixu+veyron_v1_fxu")
> +
> +(define_insn_reservation "veyron_v1_imul" 4
> +  (and (eq_attr "tune" "veyron_v1")
> +       (eq_attr "type" "imul"))
> +  "veyron_v1_ixu+veyron_v1_multi")
> +
> +(define_insn_reservation "veyron_v1_idiv" 20
> +  (and (eq_attr "tune" "veyron_v1")
> +       (eq_attr "type" "idiv"))
> +  "veyron_v1_ixu+veyron_v1_multi,veyron_v1_multi*19")
> +
> +(define_insn_reservation "veyron_v1_fpa" 3
> +  (and (eq_attr "tune" "veyron_v1")
> +       (eq_attr "type" "fadd,fmul"))
> +  "veyron_v1_ixu+veyron_v1_fxu")
> +
> +(define_insn_reservation "veyron_v1_fmadd" 5
> +  (and (eq_attr "tune" "veyron_v1")
> +       (eq_attr "type" "fmadd"))
> +  "veyron_v1_ixu+veyron_v1_fxu")
> +
> +(define_insn_reservation "veyron_v1_fpdivsqrt_single" 9
> +  (and (eq_attr "tune" "veyron_v1")
> +       (and (eq_attr "type" "fdiv,fsqrt")
> +               (eq_attr "mode" "SF")))
> +  "veyron_v1_ixu+veyron_v1_fxu+veyron_v1_div,veyron_v1_div*8")
> +
> +(define_insn_reservation "veyron_v1_fpdivsqrt_double" 17
> +  (and (eq_attr "tune" "veyron_v1")
> +       (and (eq_attr "type" "fdiv,fsqrt")
> +               (eq_attr "mode" "DF")))
> +  "veyron_v1_ixu+veyron_v1_fxu+veyron_v1_div,veyron_v1_div*16")
> +
> +(define_insn_reservation "veyron_v1_popcount" 4
> +  (and (eq_attr "tune" "veyron_v1")
> +       (eq_attr "type" "cpop"))
> +  "veyron_v1_ixu+veyron_v1_multi,veyron_v1_multi*3")
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 898a88ce33e..b163be7ec5e 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -28995,14 +28995,15 @@ by particular CPU name.
>  Permissible values for this option are: @samp{sifive-e20},
> @samp{sifive-e21},
>  @samp{sifive-e24}, @samp{sifive-e31}, @samp{sifive-e34},
> @samp{sifive-e76},
>  @samp{sifive-s21}, @samp{sifive-s51}, @samp{sifive-s54},
> @samp{sifive-s76},
> -@samp{sifive-u54}, and @samp{sifive-u74}.
> +@samp{sifive-u54}, @samp{sifive-u74} and @samp{veyron-v1}.
>
>  @opindex mtune
>  @item -mtune=@var{processor-string}
>  Optimize the output for the given processor, specified by
> microarchitecture or
>  particular CPU name.  Permissible values for this option are:
> @samp{rocket},
>  @samp{sifive-3-series}, @samp{sifive-5-series}, @samp{sifive-7-series},
> -@samp{thead-c906}, @samp{size}, and all valid options for @option{-mcpu=}.
> +@samp{thead-c906}, @samp{veyron-v1} and @samp{size}, and all valid
> options for
> +@option{-mcpu=}.
>
>  When @option{-mtune=} is not specified, use the setting from
> @option{-mcpu},
>  the default is @samp{rocket} if both are not specified.
> diff --git a/gcc/testsuite/gcc.target/riscv/divmod-2.c
> b/gcc/testsuite/gcc.target/riscv/divmod-2.c
> index dfd319e52c0..67330f88cf8 100644
> --- a/gcc/testsuite/gcc.target/riscv/divmod-2.c
> +++ b/gcc/testsuite/gcc.target/riscv/divmod-2.c
> @@ -1,7 +1,6 @@
>  /* { dg-do compile } */
> -/* Skip this everywhere for now.  Once we have a target with
> -   divmod enabled, only skip for -O0, -O1, -Og, -Oz, -Os.  */
> -/* { dg-skip-if "" { *-*-* } { } } */
> +/* { dg-options "-mtune=veyron-v1 -mabi=lp64" } */
> +/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" "-Oz" "-Os" } } */
>
>  void
>  foo(int a, int b, int *c, int *d)
> --
> 2.40.1
>
>
  
Jeff Law June 7, 2023, 2:43 p.m. UTC | #2
On 6/7/23 08:13, Kito Cheng wrote:
> I would like vendor cpu name start with vendor name, like 
> ventana-veyron-v1 which is consistent with all other vendor cpu, and 
> llvm are using same convention too.
Fair enough.  Better to get it right now than have this stuff be 
inconsistent.  It'll be a little more pain for our internal folks, but 
we'll deal with that :-)

Jeff
  
Jeff Law June 7, 2023, 9:06 p.m. UTC | #3
On 6/7/23 08:43, Jeff Law wrote:
> 
> 
> On 6/7/23 08:13, Kito Cheng wrote:
>> I would like vendor cpu name start with vendor name, like 
>> ventana-veyron-v1 which is consistent with all other vendor cpu, and 
>> llvm are using same convention too.
> Fair enough.  Better to get it right now than have this stuff be 
> inconsistent.  It'll be a little more pain for our internal folks, but 
> we'll deal with that :-)
I should have also noted that this seems to get a pretty consistent 1-2% 
improvement across spec2017.  Not surprisingly it reduces stalls at the 
retirement unit due to instructions not being completed.  We can see 
impacts elsewhere like fewer stalls due to conflicting resources at the 
dispatch stage.

It does make it more likely that we'll blow out the register file on 
x264's key SATD routine which shows up as a single digit regression for 
input #1.  The fix there is pretty simple, use register pressure 
scheduling, which we'll have some hard data on relatively soon.

jeff
  
Kito Cheng June 8, 2023, 7:33 a.m. UTC | #4
> diff --git a/gcc/config/riscv/riscv-cores.def b/gcc/config/riscv/riscv-cores.def
> index 7d87ab7ce28..4078439e562 100644
> --- a/gcc/config/riscv/riscv-cores.def
> +++ b/gcc/config/riscv/riscv-cores.def
> @@ -38,6 +38,7 @@ RISCV_TUNE("sifive-3-series", generic, rocket_tune_info)
>  RISCV_TUNE("sifive-5-series", generic, rocket_tune_info)
>  RISCV_TUNE("sifive-7-series", sifive_7, sifive_7_tune_info)
>  RISCV_TUNE("thead-c906", generic, thead_c906_tune_info)
> +RISCV_TUNE("veyron-v1", veyron_v1, veyron_v1_tune_info)
>  RISCV_TUNE("size", generic, optimize_size_tune_info)
>
>  #undef RISCV_TUNE
> @@ -77,4 +78,7 @@ RISCV_CORE("thead-c906",      "rv64imafdc_xtheadba_xtheadbb_xtheadbs_xtheadcmo_"
>                               "xtheadcondmov_xtheadfmemidx_xtheadmac_"
>                               "xtheadmemidx_xtheadmempair_xtheadsync",
>                               "thead-c906")
> +
> +RISCV_CORE("veyron-v1",       "rv64imafdc_zba_zbb_zbc_zbs_zifencei_xventanacondops",
> +                             "veyron-v1")

Seems like xventanacondops have not in the trunk yet, I saw Jeff has
approved before but not commit yet

https://patchwork.ozlabs.org/project/gcc/patch/20230210224150.2801962-11-philipp.tomsich@vrull.eu/
  
Philipp Tomsich June 8, 2023, 9:58 a.m. UTC | #5
On Thu 8. Jun 2023 at 09:35, Kito Cheng via Gcc-patches <
gcc-patches@gcc.gnu.org> wrote:

> > diff --git a/gcc/config/riscv/riscv-cores.def
> b/gcc/config/riscv/riscv-cores.def
> > index 7d87ab7ce28..4078439e562 100644
> > --- a/gcc/config/riscv/riscv-cores.def
> > +++ b/gcc/config/riscv/riscv-cores.def
> > @@ -38,6 +38,7 @@ RISCV_TUNE("sifive-3-series", generic,
> rocket_tune_info)
> >  RISCV_TUNE("sifive-5-series", generic, rocket_tune_info)
> >  RISCV_TUNE("sifive-7-series", sifive_7, sifive_7_tune_info)
> >  RISCV_TUNE("thead-c906", generic, thead_c906_tune_info)
> > +RISCV_TUNE("veyron-v1", veyron_v1, veyron_v1_tune_info)
> >  RISCV_TUNE("size", generic, optimize_size_tune_info)
> >
> >  #undef RISCV_TUNE
> > @@ -77,4 +78,7 @@ RISCV_CORE("thead-c906",
> "rv64imafdc_xtheadba_xtheadbb_xtheadbs_xtheadcmo_"
> >                               "xtheadcondmov_xtheadfmemidx_xtheadmac_"
> >                               "xtheadmemidx_xtheadmempair_xtheadsync",
> >                               "thead-c906")
> > +
> > +RISCV_CORE("veyron-v1",
>  "rv64imafdc_zba_zbb_zbc_zbs_zifencei_xventanacondops",
> > +                             "veyron-v1")
>
> Seems like xventanacondops have not in the trunk yet, I saw Jeff has
> approved before but not commit yet


We couldn’t apply back then, as Veyro -V1 had been unnannounced.
Can we move this forward now?

Philipp.

>
  
Kito Cheng June 8, 2023, 10:22 a.m. UTC | #6
> On Thu 8. Jun 2023 at 09:35, Kito Cheng via Gcc-patches <
> gcc-patches@gcc.gnu.org> wrote:
>
> > > diff --git a/gcc/config/riscv/riscv-cores.def
> > b/gcc/config/riscv/riscv-cores.def
> > > index 7d87ab7ce28..4078439e562 100644
> > > --- a/gcc/config/riscv/riscv-cores.def
> > > +++ b/gcc/config/riscv/riscv-cores.def
> > > @@ -38,6 +38,7 @@ RISCV_TUNE("sifive-3-series", generic,
> > rocket_tune_info)
> > >  RISCV_TUNE("sifive-5-series", generic, rocket_tune_info)
> > >  RISCV_TUNE("sifive-7-series", sifive_7, sifive_7_tune_info)
> > >  RISCV_TUNE("thead-c906", generic, thead_c906_tune_info)
> > > +RISCV_TUNE("veyron-v1", veyron_v1, veyron_v1_tune_info)
> > >  RISCV_TUNE("size", generic, optimize_size_tune_info)
> > >
> > >  #undef RISCV_TUNE
> > > @@ -77,4 +78,7 @@ RISCV_CORE("thead-c906",
> > "rv64imafdc_xtheadba_xtheadbb_xtheadbs_xtheadcmo_"
> > >                               "xtheadcondmov_xtheadfmemidx_xtheadmac_"
> > >                               "xtheadmemidx_xtheadmempair_xtheadsync",
> > >                               "thead-c906")
> > > +
> > > +RISCV_CORE("veyron-v1",
> >  "rv64imafdc_zba_zbb_zbc_zbs_zifencei_xventanacondops",
> > > +                             "veyron-v1")
> >
> > Seems like xventanacondops have not in the trunk yet, I saw Jeff has
> > approved before but not commit yet
>
>
> We couldn’t apply back then, as Veyro -V1 had been unnannounced.
> Can we move this forward now?
>

Oh, okay I got the awkness point...I am ok with that on gcc land, but I
would like binutils support that first, or remove the extension from the
mcpu for temporary before binutils support, otherwise it just a broken
support for that CPU on trunk gcc.




> Philipp.
>
> >
>
  
Jeff Law June 8, 2023, 2:17 p.m. UTC | #7
On 6/8/23 04:22, Kito Cheng wrote:

> 
> 
> Oh, okay I got the awkness point...I am ok with that on gcc land, but I 
> would like binutils support that first, or remove the extension from the 
> mcpu for temporary before binutils support, otherwise it just a broken 
> support for that CPU on trunk gcc.
I pushed the binutils bits into the repo a couple months ago:

> commit 1656d3f8ef56a16745689c03269412988ebcaa54
> Author: Philipp Tomsich <philipp.tomsich@vrull.eu>
> Date:   Wed Apr 26 14:09:34 2023 -0600
> 
>         RISC-V: Support XVentanaCondOps extension
[ ... ]

I'd very much like to see the condops go into GCC as well, but I've been 
hesitant to move it forward myself.  We're still waiting on hardware and 
it wasn't clear to me that we really had consensus agreement to move the 
bits forward based on an announcement vs waiting on actual hardware 
availability (based on the comments from Palmer when I upstreamed the 
binutils bits).

IIRC there was general consensus on rewriting the lowest level 
primitives as if-then-else constructs.  Something like this:

> (define_code_iterator eq_or_ne [eq ne])
> (define_code_attr n [(eq "") (ne "n")])
> (define_code_attr rev [(eq "n") (ne "")])
> 
> (define_insn "*vt.maskc<n><mode>"
>   [(set (match_operand:X 0 "register_operand" "=r")
>     (if_then_else:X
>      (eq_or_ne (match_operand:X 1 "register_operand" "r")
>                  (const_int 0))
>      (const_int 0)
>      (match_operand:X 2 "register_operand" "r")))]
>   "TARGET_XVENTANACONDOPS"
>   "vt.maskc<n>\t%0,%2,%1")
> 
> (define_insn "*vt.maskc<n><mode>_reverse"
>   [(set (match_operand:X 0 "register_operand" "=r")
>     (if_then_else:X
>      (eq_or_ne (match_operand:X 1 "register_operand" "r")
>                  (const_int 0))
>      (match_operand:X 2 "register_operand" "r")
>      (const_int 0)))]
>   "TARGET_XVENTANACONDOPS"
>   "vt.maskc<rev>\t%0,%2,%1")

That's what we're using internally these days.  I would expect zicond to 
work in exactly the same manner, but with a different instruction being 
emitted.

We've also got bits here which wire this up in the conditional move 
expander and which adjust the ifcvt.cc bits from VRULL to use the 
if-then-else form.  All this will be useful for zicond as well.

I don't mind letting zicond go first.  It's frozen so it ought to be 
non-controversial.  We can then upstream the various improvements to 
utilize zicond better.  That moves things forward in a meaningful manner 
and buys time to meet the hardware requirement for xventanacondops which 
will be trivial to add if zicond is already supported.




Jeff
  
Philipp Tomsich June 8, 2023, 2:41 p.m. UTC | #8
On Thu 8. Jun 2023 at 16:17, Jeff Law <jeffreyalaw@gmail.com> wrote:

>
>
> On 6/8/23 04:22, Kito Cheng wrote:
>
> >
> >
> > Oh, okay I got the awkness point...I am ok with that on gcc land, but I
> > would like binutils support that first, or remove the extension from the
> > mcpu for temporary before binutils support, otherwise it just a broken
> > support for that CPU on trunk gcc.
> I pushed the binutils bits into the repo a couple months ago:
>
> > commit 1656d3f8ef56a16745689c03269412988ebcaa54
> > Author: Philipp Tomsich <philipp.tomsich@vrull.eu>
> > Date:   Wed Apr 26 14:09:34 2023 -0600
> >
> >         RISC-V: Support XVentanaCondOps extension
> [ ... ]
>
> I'd very much like to see the condops go into GCC as well, but I've been
> hesitant to move it forward myself.  We're still waiting on hardware and
> it wasn't clear to me that we really had consensus agreement to move the
> bits forward based on an announcement vs waiting on actual hardware
> availability (based on the comments from Palmer when I upstreamed the
> binutils bits).


Zicondops will go to ratification in the next couple of weeks, and the plan
is to revise the patches by then.

So I would propose that we move Zicond forward as that happens and (given
how small XVentanaCondOps is on-top of Zicond) we pick it up then.


> IIRC there was general consensus on rewriting the lowest level


That was part of the “moving forward”… this needs a rebase and a major
revision.


> primitives as if-then-else constructs.  Something like this:
>
> > (define_code_iterator eq_or_ne [eq ne])
> > (define_code_attr n [(eq "") (ne "n")])
> > (define_code_attr rev [(eq "n") (ne "")])
> >
> > (define_insn "*vt.maskc<n><mode>"
> >   [(set (match_operand:X 0 "register_operand" "=r")
> >     (if_then_else:X
> >      (eq_or_ne (match_operand:X 1 "register_operand" "r")
> >                  (const_int 0))
> >      (const_int 0)
> >      (match_operand:X 2 "register_operand" "r")))]
> >   "TARGET_XVENTANACONDOPS"
> >   "vt.maskc<n>\t%0,%2,%1")
> >
> > (define_insn "*vt.maskc<n><mode>_reverse"
> >   [(set (match_operand:X 0 "register_operand" "=r")
> >     (if_then_else:X
> >      (eq_or_ne (match_operand:X 1 "register_operand" "r")
> >                  (const_int 0))
> >      (match_operand:X 2 "register_operand" "r")
> >      (const_int 0)))]
> >   "TARGET_XVENTANACONDOPS"
> >   "vt.maskc<rev>\t%0,%2,%1")
>
> That's what we're using internally these days.  I would expect zicond to
> work in exactly the same manner, but with a different instruction being
> emitted.
>
> We've also got bits here which wire this up in the conditional move
> expander and which adjust the ifcvt.cc bits from VRULL to use the
> if-then-else form.  All this will be useful for zicond as well.
>
> I don't mind letting zicond go first.  It's frozen so it ought to be
> non-controversial.  We can then upstream the various improvements to
> utilize zicond better.  That moves things forward in a meaningful manner
> and buys time to meet the hardware requirement for xventanacondops which
> will be trivial to add if zicond is already supported.
>
>
>
>
> Jeff
>
  
Kito Cheng June 8, 2023, 3:18 p.m. UTC | #9
> > I'd very much like to see the condops go into GCC as well, but I've been
> > hesitant to move it forward myself.  We're still waiting on hardware and
> > it wasn't clear to me that we really had consensus agreement to move the
> > bits forward based on an announcement vs waiting on actual hardware
> > availability (based on the comments from Palmer when I upstreamed the
> > binutils bits).

My bad, apparently I grep wrong binutils so I thought it isn't in the tree yet,
I don't have strong opinion on that, so I would defer this to Palmer.
  

Patch

diff --git a/gcc/config/riscv/riscv-cores.def b/gcc/config/riscv/riscv-cores.def
index 7d87ab7ce28..4078439e562 100644
--- a/gcc/config/riscv/riscv-cores.def
+++ b/gcc/config/riscv/riscv-cores.def
@@ -38,6 +38,7 @@  RISCV_TUNE("sifive-3-series", generic, rocket_tune_info)
 RISCV_TUNE("sifive-5-series", generic, rocket_tune_info)
 RISCV_TUNE("sifive-7-series", sifive_7, sifive_7_tune_info)
 RISCV_TUNE("thead-c906", generic, thead_c906_tune_info)
+RISCV_TUNE("veyron-v1", veyron_v1, veyron_v1_tune_info)
 RISCV_TUNE("size", generic, optimize_size_tune_info)
 
 #undef RISCV_TUNE
@@ -77,4 +78,7 @@  RISCV_CORE("thead-c906",      "rv64imafdc_xtheadba_xtheadbb_xtheadbs_xtheadcmo_"
 			      "xtheadcondmov_xtheadfmemidx_xtheadmac_"
 			      "xtheadmemidx_xtheadmempair_xtheadsync",
 			      "thead-c906")
+
+RISCV_CORE("veyron-v1",       "rv64imafdc_zba_zbb_zbc_zbs_zifencei_xventanacondops",
+			      "veyron-v1")
 #undef RISCV_CORE
diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index f34ca993689..8f7dd84115f 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -52,7 +52,8 @@  extern enum riscv_isa_spec_class riscv_isa_spec;
 /* Keep this list in sync with define_attr "tune" in riscv.md.  */
 enum riscv_microarchitecture_type {
   generic,
-  sifive_7
+  sifive_7,
+  veyron_v1
 };
 extern enum riscv_microarchitecture_type riscv_microarchitecture;
 
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 3954fc07a8b..6a5e89b4813 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -366,6 +366,21 @@  static const struct riscv_tune_param thead_c906_tune_info = {
   false		/* use_divmod_expansion */
 };
 
+/* Costs to use when optimizing for Ventana Micro Veyron V1.  */
+static const struct riscv_tune_param veyron_v1_tune_info = {
+  {COSTS_N_INSNS (4), COSTS_N_INSNS (5)},	/* fp_add */
+  {COSTS_N_INSNS (4), COSTS_N_INSNS (5)},	/* fp_mul */
+  {COSTS_N_INSNS (9), COSTS_N_INSNS (17)},	/* fp_div */
+  {COSTS_N_INSNS (4), COSTS_N_INSNS (4)},	/* int_mul */
+  {COSTS_N_INSNS (12), COSTS_N_INSNS (20)},	/* int_div */
+  4,						/* issue_rate */
+  4,						/* branch_cost */
+  5,						/* memory_cost */
+  8,						/* fmv_cost */
+  false,					/* slow_unaligned_access */
+  true,					/* use_divmod_expansion */
+};
+
 /* Costs to use when optimizing for size.  */
 static const struct riscv_tune_param optimize_size_tune_info = {
   {COSTS_N_INSNS (1), COSTS_N_INSNS (1)},	/* fp_add */
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 124d8c95804..90f0c1b1cf1 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -482,7 +482,7 @@ 
 ;; Microarchitectures we know how to tune for.
 ;; Keep this in sync with enum riscv_microarchitecture.
 (define_attr "tune"
-  "generic,sifive_7"
+  "generic,sifive_7,veyron_v1"
   (const (symbol_ref "((enum attr_tune) riscv_microarchitecture)")))
 
 ;; Describe a user's asm statement.
@@ -3123,3 +3123,4 @@ 
 (include "sifive-7.md")
 (include "thead.md")
 (include "vector.md")
+(include "veyron-v1.md")
diff --git a/gcc/config/riscv/veyron-v1.md b/gcc/config/riscv/veyron-v1.md
new file mode 100644
index 00000000000..3eeff76d9b0
--- /dev/null
+++ b/gcc/config/riscv/veyron-v1.md
@@ -0,0 +1,121 @@ 
+;; Scheduling pipeline description for Veyron V1 RISC-V.
+;; Copyright (C) 2023 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+(define_automaton "veyron_v1")
+
+;; 5 conceptual units of the processor:
+;;   The 4 symmetric ALUs
+;;   The execution FPU (fadd, fmul, fmadd, comparisons, etc)
+;;   The data transfer FPU
+;;   The shared multi-cycle ALU for integer mul, div, etc
+;;   The fdiv/fsqrt unit in the FPU
+
+(define_cpu_unit "ixu0_v1,ixu1_v1,ixu2_v1,ixu3_v1" "veyron_v1")
+(define_cpu_unit "veyron_v1_fxu" "veyron_v1")
+(define_cpu_unit "veyron_v1_fxu_xfer" "veyron_v1")
+
+(define_cpu_unit "veyron_v1_multi" "veyron_v1")
+(define_cpu_unit "veyron_v1_div" "veyron_v1")
+
+;; Shortcut for reserving one of the symmetric ALU units.
+(define_reservation "veyron_v1_ixu"
+		    "ixu0_v1|ixu1_v1|ixu2_v1|ixu3_v1")
+
+(define_insn_reservation "veyron_v1_alu" 2
+  (and (eq_attr "tune" "veyron_v1")
+       (eq_attr "type" "unknown,const,arith,shift,slt,multi,auipc,nop,logical,move,bitmanip,min,max,minu,maxu,clz,ctz"))
+  "veyron_v1_ixu")
+
+(define_insn_reservation "veyron_v1_store" 1
+  (and (eq_attr "tune" "veyron_v1")
+       (eq_attr "type" "store"))
+  "veyron_v1_ixu")
+
+(define_insn_reservation "veyron_v1_ixu" 4
+  (and (eq_attr "tune" "veyron_v1")
+       (eq_attr "type" "load"))
+  "veyron_v1_ixu")
+
+(define_insn_reservation "veyron_v1_fpload" 6
+  (and (eq_attr "tune" "veyron_v1")
+       (eq_attr "type" "fpload"))
+  "veyron_v1_ixu")
+
+(define_insn_reservation "veyron_v1_xfer" 4
+  (and (eq_attr "tune" "veyron_v1")
+       (eq_attr "type" "mfc,mtc"))
+  "veyron_v1_ixu+veyron_v1_multi,veyron_v1_multi*3")
+
+(define_insn_reservation "veyron_v1_fpstore" 1
+  (and (eq_attr "tune" "veyron_v1")
+       (eq_attr "type" "fpstore"))
+  "veyron_v1_ixu+veyron_v1_fxu_xfer")
+
+(define_insn_reservation "veyron_v1_fmove" 2
+  (and (eq_attr "tune" "veyron_v1")
+       (eq_attr "type" "fmove"))
+  "veyron_v1_ixu+veyron_v1_fxu_xfer")
+
+(define_insn_reservation "veyron_v1_fcvt" 5
+  (and (eq_attr "tune" "veyron_v1")
+       (eq_attr "type" "fcvt"))
+  "veyron_v1_ixu+veyron_v1_fxu_xfer")
+
+(define_insn_reservation "veyron_v1_fpcmp" 2
+  (and (eq_attr "tune" "veyron_v1")
+       (eq_attr "type" "fcmp"))
+  "veyron_v1_ixu+veyron_v1_fxu")
+
+(define_insn_reservation "veyron_v1_imul" 4
+  (and (eq_attr "tune" "veyron_v1")
+       (eq_attr "type" "imul"))
+  "veyron_v1_ixu+veyron_v1_multi")
+
+(define_insn_reservation "veyron_v1_idiv" 20
+  (and (eq_attr "tune" "veyron_v1")
+       (eq_attr "type" "idiv"))
+  "veyron_v1_ixu+veyron_v1_multi,veyron_v1_multi*19")
+
+(define_insn_reservation "veyron_v1_fpa" 3
+  (and (eq_attr "tune" "veyron_v1")
+       (eq_attr "type" "fadd,fmul"))
+  "veyron_v1_ixu+veyron_v1_fxu")
+
+(define_insn_reservation "veyron_v1_fmadd" 5
+  (and (eq_attr "tune" "veyron_v1")
+       (eq_attr "type" "fmadd"))
+  "veyron_v1_ixu+veyron_v1_fxu")
+
+(define_insn_reservation "veyron_v1_fpdivsqrt_single" 9
+  (and (eq_attr "tune" "veyron_v1")
+       (and (eq_attr "type" "fdiv,fsqrt")
+	        (eq_attr "mode" "SF")))
+  "veyron_v1_ixu+veyron_v1_fxu+veyron_v1_div,veyron_v1_div*8")
+
+(define_insn_reservation "veyron_v1_fpdivsqrt_double" 17
+  (and (eq_attr "tune" "veyron_v1")
+       (and (eq_attr "type" "fdiv,fsqrt")
+	        (eq_attr "mode" "DF")))
+  "veyron_v1_ixu+veyron_v1_fxu+veyron_v1_div,veyron_v1_div*16")
+
+(define_insn_reservation "veyron_v1_popcount" 4
+  (and (eq_attr "tune" "veyron_v1")
+       (eq_attr "type" "cpop"))
+  "veyron_v1_ixu+veyron_v1_multi,veyron_v1_multi*3")
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 898a88ce33e..b163be7ec5e 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -28995,14 +28995,15 @@  by particular CPU name.
 Permissible values for this option are: @samp{sifive-e20}, @samp{sifive-e21},
 @samp{sifive-e24}, @samp{sifive-e31}, @samp{sifive-e34}, @samp{sifive-e76},
 @samp{sifive-s21}, @samp{sifive-s51}, @samp{sifive-s54}, @samp{sifive-s76},
-@samp{sifive-u54}, and @samp{sifive-u74}.
+@samp{sifive-u54}, @samp{sifive-u74} and @samp{veyron-v1}.
 
 @opindex mtune
 @item -mtune=@var{processor-string}
 Optimize the output for the given processor, specified by microarchitecture or
 particular CPU name.  Permissible values for this option are: @samp{rocket},
 @samp{sifive-3-series}, @samp{sifive-5-series}, @samp{sifive-7-series},
-@samp{thead-c906}, @samp{size}, and all valid options for @option{-mcpu=}.
+@samp{thead-c906}, @samp{veyron-v1} and @samp{size}, and all valid options for
+@option{-mcpu=}.
 
 When @option{-mtune=} is not specified, use the setting from @option{-mcpu},
 the default is @samp{rocket} if both are not specified.
diff --git a/gcc/testsuite/gcc.target/riscv/divmod-2.c b/gcc/testsuite/gcc.target/riscv/divmod-2.c
index dfd319e52c0..67330f88cf8 100644
--- a/gcc/testsuite/gcc.target/riscv/divmod-2.c
+++ b/gcc/testsuite/gcc.target/riscv/divmod-2.c
@@ -1,7 +1,6 @@ 
 /* { dg-do compile } */
-/* Skip this everywhere for now.  Once we have a target with
-   divmod enabled, only skip for -O0, -O1, -Og, -Oz, -Os.  */
-/* { dg-skip-if "" { *-*-* } { } } */
+/* { dg-options "-mtune=veyron-v1 -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" "-Oz" "-Os" } } */
 
 void
 foo(int a, int b, int *c, int *d)