@@ -195,7 +195,8 @@ (define_insn_reservation "generic_ooo_popcount" 2
(define_insn_reservation "generic_ooo_vec_alu" 3
(and (eq_attr "tune" "generic_ooo")
(eq_attr "type" "vialu,viwalu,vext,vicalu,vshift,vnshift,viminmax,vicmp,\
- vimov,vsalu,vaalu,vsshift,vnclip,vmov,vfmov,vector"))
+ vimov,vsalu,vaalu,vsshift,vnclip,vmov,vfmov,vector,\
+ vandn,vbrev,vbrev8,vrev8,vclz,vctz,vrol,vror,vwsll"))
"generic_ooo_vxu_issue,generic_ooo_vxu_alu")
;; Vector float comparison, conversion etc.
@@ -209,7 +210,8 @@ (define_insn_reservation "generic_ooo_vec_fcmp" 3
;; Vector integer multiplication.
(define_insn_reservation "generic_ooo_vec_imul" 4
(and (eq_attr "tune" "generic_ooo")
- (eq_attr "type" "vimul,viwmul,vimuladd,viwmuladd,vsmul"))
+ (eq_attr "type" "vimul,viwmul,vimuladd,viwmuladd,vsmul,vclmul,vclmulh,\
+ vghsh,vgmul"))
"generic_ooo_vxu_issue,generic_ooo_vxu_alu")
;; Vector float addition.
@@ -230,6 +232,25 @@ (define_insn_reservation "generic_ooo_crypto" 4
(eq_attr "type" "crypto"))
"generic_ooo_vxu_issue,generic_ooo_vxu_alu")
+;; Vector crypto, AES
+(define_insn_reservation "generic_ooo_crypto_aes" 4
+ (and (eq_attr "tune" "generic_ooo")
+ (eq_attr "type" "vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz"))
+ "generic_ooo_vxu_issue,generic_ooo_vxu_alu")
+
+;; Vector crypto, sha
+(define_insn_reservation "generic_ooo_crypto_sha" 4
+ (and (eq_attr "tune" "generic_ooo")
+ (eq_attr "type" "vsha2ms,vsha2ch,vsha2cl"))
+ "generic_ooo_vxu_issue,generic_ooo_vxu_alu")
+
+;; Vector crypto, SM3/4
+(define_insn_reservation "generic_ooo_crypto_sm" 4
+ (and (eq_attr "tune" "generic_ooo")
+ (eq_attr "type" "vsm4k,vsm4r,vsm3me,vsm3c"))
+ "generic_ooo_vxu_issue,generic_ooo_vxu_alu")
+
+
;; Vector permute.
(define_insn_reservation "generic_ooo_perm" 3
(and (eq_attr "tune" "generic_ooo")
@@ -271,7 +292,7 @@ (define_insn_reservation "generic_ooo_vec_mask" 2
"generic_ooo_vxu_issue,generic_ooo_vxu_alu")
;; Vector vsetvl.
-(define_insn_reservation "generic_ooo_vec_vesetvl" 1
+(define_insn_reservation "generic_ooo_vec_vsetvl" 1
(and (eq_attr "tune" "generic_ooo")
(eq_attr "type" "vsetvl,vsetvl_pre"))
"generic_ooo_vxu_issue")
@@ -25,6 +25,15 @@ (define_cpu_unit "alu" "pipe0")
(define_cpu_unit "imuldiv" "pipe0")
(define_cpu_unit "fdivsqrt" "pipe0")
+;; Separate issue queue for vector instructions.
+(define_cpu_unit "generic_vxu_issue" "pipe0")
+
+;; Vector execution unit.
+(define_cpu_unit "generic_vxu_alu" "pipe0")
+
+;; Vector subunit that does mult/div/sqrt.
+(define_cpu_unit "generic_vxu_multicycle" "pipe0")
+
(define_insn_reservation "generic_alu" 1
(and (eq_attr "tune" "generic")
(eq_attr "type" "unknown,const,arith,shift,slt,multi,auipc,nop,logical,\
@@ -102,3 +111,137 @@ (define_insn_reservation "generic_fsqrt" 25
(eq_attr "type" "fsqrt"))
"fdivsqrt*25")
+;; Vector load/store
+(define_insn_reservation "generic_vec_load" 6
+ (and (eq_attr "tune" "generic")
+ (eq_attr "type" "vlde,vldm,vlds,vldux,vldox,vldff,vldr"))
+ "generic_vxu_issue,generic_vxu_alu")
+
+(define_insn_reservation "generic_vec_store" 6
+ (and (eq_attr "tune" "generic")
+ (eq_attr "type" "vste,vstm,vsts,vstux,vstox,vstr"))
+ "generic_vxu_issue,generic_vxu_alu")
+
+;; Vector segment loads/stores.
+(define_insn_reservation "generic_vec_loadstore_seg" 10
+ (and (eq_attr "tune" "generic")
+ (eq_attr "type" "vlsegde,vlsegds,vlsegdux,vlsegdox,vlsegdff,\
+ vssegte,vssegts,vssegtux,vssegtox"))
+ "generic_vxu_issue,generic_vxu_alu")
+
+;; Regular vector operations and integer comparisons.
+(define_insn_reservation "generic_vec_alu" 3
+ (and (eq_attr "tune" "generic")
+ (eq_attr "type" "vialu,viwalu,vext,vicalu,vshift,vnshift,viminmax,vicmp,\
+ vimov,vsalu,vaalu,vsshift,vnclip,vmov,vfmov,vector,\
+ vandn,vbrev,vbrev8,vrev8,vclz,vctz,vrol,vror,vwsll"))
+ "generic_vxu_issue,generic_vxu_alu")
+
+;; Vector float comparison, conversion etc.
+(define_insn_reservation "generic_vec_fcmp" 3
+ (and (eq_attr "tune" "generic")
+ (eq_attr "type" "vfrecp,vfminmax,vfcmp,vfsgnj,vfclass,vfcvtitof,\
+ vfcvtftoi,vfwcvtitof,vfwcvtftoi,vfwcvtftof,vfncvtitof,\
+ vfncvtftoi,vfncvtftof"))
+ "generic_vxu_issue,generic_vxu_alu")
+
+;; Vector integer multiplication.
+(define_insn_reservation "generic_vec_imul" 4
+ (and (eq_attr "tune" "generic")
+ (eq_attr "type" "vimul,viwmul,vimuladd,viwmuladd,vsmul,vclmul,vclmulh,\
+ vghsh,vgmul"))
+ "generic_vxu_issue,generic_vxu_alu")
+
+;; Vector float addition.
+(define_insn_reservation "generic_vec_fadd" 4
+ (and (eq_attr "tune" "generic")
+ (eq_attr "type" "vfalu,vfwalu"))
+ "generic_vxu_issue,generic_vxu_alu")
+
+;; Vector float multiplication and FMA.
+(define_insn_reservation "generic_vec_fmul" 6
+ (and (eq_attr "tune" "generic")
+ (eq_attr "type" "vfmul,vfwmul,vfmuladd,vfwmuladd"))
+ "generic_vxu_issue,generic_vxu_alu")
+
+;; Vector crypto, assumed to be a generic operation for now.
+(define_insn_reservation "generic_crypto" 4
+ (and (eq_attr "tune" "generic")
+ (eq_attr "type" "crypto"))
+ "generic_vxu_issue,generic_vxu_alu")
+
+;; Vector crypto, AES
+(define_insn_reservation "generic_crypto_aes" 4
+ (and (eq_attr "tune" "generic")
+ (eq_attr "type" "vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz"))
+ "generic_vxu_issue,generic_vxu_alu")
+
+;; Vector crypto, sha
+(define_insn_reservation "generic_crypto_sha" 4
+ (and (eq_attr "tune" "generic")
+ (eq_attr "type" "vsha2ms,vsha2ch,vsha2cl"))
+ "generic_vxu_issue,generic_vxu_alu")
+
+;; Vector crypto, SM3/4
+(define_insn_reservation "generic_crypto_sm" 4
+ (and (eq_attr "tune" "generic")
+ (eq_attr "type" "vsm4k,vsm4r,vsm3me,vsm3c"))
+ "generic_vxu_issue,generic_vxu_alu")
+
+;; Vector permute.
+(define_insn_reservation "generic_perm" 3
+ (and (eq_attr "tune" "generic")
+ (eq_attr "type" "vimerge,vfmerge,vslideup,vslidedown,vislide1up,\
+ vislide1down,vfslide1up,vfslide1down,vgather,vcompress"))
+ "generic_vxu_issue,generic_vxu_alu")
+
+;; Vector reduction.
+(define_insn_reservation "generic_vec_reduction" 8
+ (and (eq_attr "tune" "generic")
+ (eq_attr "type" "vired,viwred,vfredu,vfwredu"))
+ "generic_vxu_issue,generic_vxu_multicycle")
+
+;; Vector ordered reduction, assume the latency number is for
+;; a 128-bit vector. It is scaled in riscv_sched_adjust_cost
+;; for larger vectors.
+(define_insn_reservation "generic_vec_ordered_reduction" 10
+ (and (eq_attr "tune" "generic")
+ (eq_attr "type" "vfredo,vfwredo"))
+ "generic_vxu_issue,generic_vxu_multicycle*3")
+
+;; Vector integer division, assume not pipelined.
+(define_insn_reservation "generic_vec_idiv" 16
+ (and (eq_attr "tune" "generic")
+ (eq_attr "type" "vidiv"))
+ "generic_vxu_issue,generic_vxu_multicycle*3")
+
+;; Vector float divisions and sqrt, assume not pipelined.
+(define_insn_reservation "generic_vec_float_divsqrt" 16
+ (and (eq_attr "tune" "generic")
+ (eq_attr "type" "vfdiv,vfsqrt"))
+ "generic_vxu_issue,generic_vxu_multicycle*3")
+
+;; Vector mask operations.
+(define_insn_reservation "generic_vec_mask" 2
+ (and (eq_attr "tune" "generic")
+ (eq_attr "type" "vmalu,vmpop,vmffs,vmsfs,vmiota,vmidx,vimovvx,vimovxv,\
+ vfmovvf,vfmovfv"))
+ "generic_vxu_issue,generic_vxu_alu")
+
+;; Vector vsetvl.
+(define_insn_reservation "generic_vec_vesetvl" 1
+ (and (eq_attr "tune" "generic")
+ (eq_attr "type" "vsetvl,vsetvl_pre"))
+ "generic_vxu_issue")
+
+;; Vector rounding mode setters, assume pipeline barrier.
+(define_insn_reservation "generic_vec_setrm" 20
+ (and (eq_attr "tune" "generic")
+ (eq_attr "type" "wrvxrm,wrfrm"))
+ "generic_vxu_issue,generic_vxu_issue*3")
+
+;; Vector read vlen/vlenb.
+(define_insn_reservation "generic_vec_readlen" 4
+ (and (eq_attr "tune" "generic")
+ (eq_attr "type" "rdvlenb,rdvl"))
+ "generic_vxu_issue,generic_vxu_issue")
@@ -12,6 +12,15 @@ (define_cpu_unit "sifive_7_B" "sifive_7")
(define_cpu_unit "sifive_7_idiv" "sifive_7")
(define_cpu_unit "sifive_7_fpu" "sifive_7")
+;; Separate issue queue for vector instructions.
+(define_cpu_unit "sifive_7_vxu_issue" "sifive_7")
+
+;; Vector execution unit.
+(define_cpu_unit "sifive_7_vxu_alu" "sifive_7")
+
+;; Vector subunit that does mult/div/sqrt.
+(define_cpu_unit "sifive_7_vxu_multicycle" "sifive_7")
+
(define_insn_reservation "sifive_7_load" 3
(and (eq_attr "tune" "sifive_7")
(eq_attr "type" "load"))
@@ -119,6 +128,141 @@ (define_insn_reservation "sifive_7_popcount" 2
(eq_attr "type" "cpop,clmul"))
"sifive_7_A")
+;; Vector load/store
+(define_insn_reservation "sifive_7_vec_load" 6
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vlde,vldm,vlds,vldux,vldox,vldff,vldr"))
+ "sifive_7_vxu_issue,sifive_7_vxu_alu")
+
+(define_insn_reservation "sifive_7_vec_store" 6
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vste,vstm,vsts,vstux,vstox,vstr"))
+ "sifive_7_vxu_issue,sifive_7_vxu_alu")
+
+;; Vector segment loads/stores.
+(define_insn_reservation "sifive_7_vec_loadstore_seg" 10
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vlsegde,vlsegds,vlsegdux,vlsegdox,vlsegdff,\
+ vssegte,vssegts,vssegtux,vssegtox"))
+ "sifive_7_vxu_issue,sifive_7_vxu_alu")
+
+;; Regular vector operations and integer comparisons.
+(define_insn_reservation "sifive_7_vec_alu" 3
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vialu,viwalu,vext,vicalu,vshift,vnshift,viminmax,vicmp,\
+ vimov,vsalu,vaalu,vsshift,vnclip,vmov,vfmov,vector,\
+ vandn,vbrev,vbrev8,vrev8,vclz,vctz,vrol,vror,vwsll"))
+ "sifive_7_vxu_issue,sifive_7_vxu_alu")
+
+;; Vector float comparison, conversion etc.
+(define_insn_reservation "sifive_7_vec_fcmp" 3
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vfrecp,vfminmax,vfcmp,vfsgnj,vfclass,vfcvtitof,\
+ vfcvtftoi,vfwcvtitof,vfwcvtftoi,vfwcvtftof,vfncvtitof,\
+ vfncvtftoi,vfncvtftof"))
+ "sifive_7_vxu_issue,sifive_7_vxu_alu")
+
+;; Vector integer multiplication.
+(define_insn_reservation "sifive_7_vec_imul" 4
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vimul,viwmul,vimuladd,viwmuladd,vsmul,vclmul,vclmulh,\
+ vghsh,vgmul"))
+ "sifive_7_vxu_issue,sifive_7_vxu_alu")
+
+;; Vector float addition.
+(define_insn_reservation "sifive_7_vec_fadd" 4
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vfalu,vfwalu"))
+ "sifive_7_vxu_issue,sifive_7_vxu_alu")
+
+;; Vector float multiplication and FMA.
+(define_insn_reservation "sifive_7_vec_fmul" 6
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vfmul,vfwmul,vfmuladd,vfwmuladd"))
+ "sifive_7_vxu_issue,sifive_7_vxu_alu")
+
+;; Vector crypto, assumed to be a generic operation for now.
+(define_insn_reservation "sifive_7_crypto" 4
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "crypto"))
+ "sifive_7_vxu_issue,sifive_7_vxu_alu")
+
+;; Vector crypto, AES
+(define_insn_reservation "sifive_7_crypto_aes" 4
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz"))
+ "sifive_7_vxu_issue,sifive_7_vxu_alu")
+
+;; Vector crypto, sha
+(define_insn_reservation "sifive_7_crypto_sha" 4
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vsha2ms,vsha2ch,vsha2cl"))
+ "sifive_7_vxu_issue,sifive_7_vxu_alu")
+
+;; Vector crypto, SM3/4
+(define_insn_reservation "sifive_7_crypto_sm" 4
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vsm4k,vsm4r,vsm3me,vsm3c"))
+ "sifive_7_vxu_issue,sifive_7_vxu_alu")
+
+;; Vector permute.
+(define_insn_reservation "sifive_7_perm" 3
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vimerge,vfmerge,vslideup,vslidedown,vislide1up,\
+ vislide1down,vfslide1up,vfslide1down,vgather,vcompress"))
+ "sifive_7_vxu_issue,sifive_7_vxu_alu")
+
+;; Vector reduction.
+(define_insn_reservation "sifive_7_vec_reduction" 8
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vired,viwred,vfredu,vfwredu"))
+ "sifive_7_vxu_issue,sifive_7_vxu_multicycle")
+
+;; Vector ordered reduction, assume the latency number is for
+;; a 128-bit vector. It is scaled in riscv_sched_adjust_cost
+;; for larger vectors.
+(define_insn_reservation "sifive_7_vec_ordered_reduction" 10
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vfredo,vfwredo"))
+ "sifive_7_vxu_issue,sifive_7_vxu_multicycle*3")
+
+;; Vector integer division, assume not pipelined.
+(define_insn_reservation "sifive_7_vec_idiv" 16
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vidiv"))
+ "sifive_7_vxu_issue,sifive_7_vxu_multicycle*3")
+
+;; Vector float divisions and sqrt, assume not pipelined.
+(define_insn_reservation "sifive_7_vec_float_divsqrt" 16
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vfdiv,vfsqrt"))
+ "sifive_7_vxu_issue,sifive_7_vxu_multicycle*3")
+
+;; Vector mask operations.
+(define_insn_reservation "sifive_7_vec_mask" 2
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vmalu,vmpop,vmffs,vmsfs,vmiota,vmidx,vimovvx,vimovxv,\
+ vfmovvf,vfmovfv"))
+ "sifive_7_vxu_issue,sifive_7_vxu_alu")
+
+;; Vector vsetvl.
+(define_insn_reservation "sifive_7_vec_vesetvl" 1
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "vsetvl,vsetvl_pre"))
+ "sifive_7_vxu_issue")
+
+;; Vector rounding mode setters, assume pipeline barrier.
+(define_insn_reservation "sifive_7_vec_setrm" 20
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "wrvxrm,wrfrm"))
+ "sifive_7_vxu_issue,sifive_7_vxu_issue*3")
+
+;; Vector read vlen/vlenb.
+(define_insn_reservation "sifive_7_vec_readlen" 4
+ (and (eq_attr "tune" "sifive_7")
+ (eq_attr "type" "rdvlenb,rdvl"))
+ "sifive_7_vxu_issue,sifive_7_vxu_issue")
+
(define_bypass 1 "sifive_7_load,sifive_7_alu,sifive_7_mul,sifive_7_f2i,sifive_7_sfb_alu"
"sifive_7_alu,sifive_7_branch")