@@ -127,6 +127,7 @@ static const riscv_implied_info_t riscv_implied_info[] =
{"zvkned", "v"},
{"zvknha", "v"},
{"zvknhb", "v"},
+ {"zvksed", "v"},
{"zfh", "zfhmin"},
{"zfhmin", "f"},
@@ -2288,8 +2288,9 @@ public:
}
};
-/* Implements vaeskf1. */
-class vaeskf1 : public function_base
+/* Implements vaeskf1/vsm4k. */
+template<int UNSPEC>
+class crypto_vi : public function_base
{
public:
bool apply_mask_policy_p () const override { return false; }
@@ -2297,7 +2298,7 @@ public:
rtx expand (function_expander &e) const override
{
- return e.use_exact_insn (code_for_pred_vaeskf1_scalar (e.vector_mode ()));
+ return e.use_exact_insn (code_for_pred_crypto_vi_scalar (UNSPEC, e.vector_mode ()));
}
};
@@ -2591,11 +2592,13 @@ static CONSTEXPR const crypto_vv<UNSPEC_VAESEM> vaesem_obj;
static CONSTEXPR const crypto_vv<UNSPEC_VAESDF> vaesdf_obj;
static CONSTEXPR const crypto_vv<UNSPEC_VAESDM> vaesdm_obj;
static CONSTEXPR const crypto_vv<UNSPEC_VAESZ> vaesz_obj;
-static CONSTEXPR const vaeskf1 vaeskf1_obj;
+static CONSTEXPR const crypto_vi<UNSPEC_VAESKF1> vaeskf1_obj;
static CONSTEXPR const vaeskf2 vaeskf2_obj;
static CONSTEXPR const vg_nhab<UNSPEC_VSHA2MS> vsha2ms_obj;
static CONSTEXPR const vg_nhab<UNSPEC_VSHA2CH> vsha2ch_obj;
static CONSTEXPR const vg_nhab<UNSPEC_VSHA2CL> vsha2cl_obj;
+static CONSTEXPR const crypto_vi<UNSPEC_VSM4K> vsm4k_obj;
+static CONSTEXPR const crypto_vv<UNSPEC_VSM4R> vsm4r_obj;
/* Declare the function base NAME, pointing it to an instance
of class <NAME>_obj. */
@@ -2882,4 +2885,6 @@ BASE (vaeskf2)
BASE (vsha2ms)
BASE (vsha2ch)
BASE (vsha2cl)
+BASE (vsm4k)
+BASE (vsm4r)
} // end namespace riscv_vector
@@ -304,6 +304,8 @@ extern const function_base *const vaeskf2;
extern const function_base *const vsha2ms;
extern const function_base *const vsha2ch;
extern const function_base *const vsha2cl;
+extern const function_base *const vsm4k;
+extern const function_base *const vsm4r;
}
} // end namespace riscv_vector
@@ -1050,7 +1050,7 @@ struct crypto_vv_def : public build_base
}
};
-/* vaeskf1/vaeskf2 class. */
+/* vaeskf1/vaeskf2/vsm4k class. */
struct crypto_vi_def : public build_base
{
char *get_name (function_builder &b, const function_instance &instance,
@@ -19,5 +19,6 @@ AVAIL (zvkg, TARGET_ZVKG)
AVAIL (zvkned, TARGET_ZVKNED)
AVAIL (zvknha_or_zvknhb, TARGET_ZVKNHA || TARGET_ZVKNHB)
AVAIL (zvknhb, TARGET_ZVKNHB)
+AVAIL (zvksed, TARGET_ZVKSED)
}
#endif
@@ -64,4 +64,12 @@ DEF_VECTOR_CRYPTO_FUNCTION (vsha2cl, crypto_vv, none_tu_preds, u_vvvv_crypto_se
//ZVKNHB
DEF_VECTOR_CRYPTO_FUNCTION (vsha2ms, crypto_vv, none_tu_preds, u_vvvv_crypto_sew64_ops, zvknhb)
DEF_VECTOR_CRYPTO_FUNCTION (vsha2ch, crypto_vv, none_tu_preds, u_vvvv_crypto_sew64_ops, zvknhb)
-DEF_VECTOR_CRYPTO_FUNCTION (vsha2cl, crypto_vv, none_tu_preds, u_vvvv_crypto_sew64_ops, zvknhb)
\ No newline at end of file
+DEF_VECTOR_CRYPTO_FUNCTION (vsha2cl, crypto_vv, none_tu_preds, u_vvvv_crypto_sew64_ops, zvknhb)
+//Zvksed
+DEF_VECTOR_CRYPTO_FUNCTION (vsm4k, crypto_vi, none_tu_preds, u_vv_size_crypto_sew32_ops, zvksed)
+DEF_VECTOR_CRYPTO_FUNCTION (vsm4r, crypto_vv, none_tu_preds, u_vvv_crypto_sew32_ops, zvksed)
+DEF_VECTOR_CRYPTO_FUNCTION (vsm4r, crypto_vv, none_tu_preds, u_vvs_crypto_sew32_ops, zvksed)
+DEF_VECTOR_CRYPTO_FUNCTION (vsm4r, crypto_vv, none_tu_preds, u_vvs_crypto_sew32_lmul_x2_ops, zvksed)
+DEF_VECTOR_CRYPTO_FUNCTION (vsm4r, crypto_vv, none_tu_preds, u_vvs_crypto_sew32_lmul_x4_ops, zvksed)
+DEF_VECTOR_CRYPTO_FUNCTION (vsm4r, crypto_vv, none_tu_preds, u_vvs_crypto_sew32_lmul_x8_ops, zvksed)
+DEF_VECTOR_CRYPTO_FUNCTION (vsm4r, crypto_vv, none_tu_preds, u_vvs_crypto_sew32_lmul_x16_ops, zvksed)
\ No newline at end of file
@@ -428,6 +428,7 @@
;; vcompress vector compress instruction
;; vmov whole vector register move
;; vector unknown vector instruction
+;; 17. Crypto Vector instructions
;; vandn crypto vector bitwise and-not instructions
;; vbrev crypto vector reverse bits in elements instructions
;; vbrev8 crypto vector reverse bits in bytes instructions
@@ -451,6 +452,8 @@
;; vsha2ms crypto vector SHA-2 message schedule instructions
;; vsha2ch crypto vector SHA-2 two rounds of compression instructions
;; vsha2cl crypto vector SHA-2 two rounds of compression instructions
+;; vsm4k crypto vector SM4 KeyExpansion instructions
+;; vsm4r crypto vector SM4 Rounds instructions
(define_attr "type"
"unknown,branch,jump,jalr,ret,call,load,fpload,store,fpstore,
mtc,mfc,const,arith,logical,shift,slt,imul,idiv,move,fmove,fadd,fmul,
@@ -472,7 +475,7 @@
vslideup,vslidedown,vislide1up,vislide1down,vfslide1up,vfslide1down,
vgather,vcompress,vmov,vector,vandn,vbrev,vbrev8,vrev8,vclz,vctz,vrol,vror,vwsll,
vclmul,vclmulh,vghsh,vgmul,vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz,
- vsha2ms,vsha2ch,vsha2cl"
+ vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r"
(cond [(eq_attr "got" "load") (const_string "load")
;; If a doubleword move uses these expensive instructions,
@@ -33,6 +33,10 @@
UNSPEC_VSHA2MS
UNSPEC_VSHA2CH
UNSPEC_VSHA2CL
+ UNSPEC_VSM4K
+ UNSPEC_VSM4R
+ UNSPEC_VSM4RVV
+ UNSPEC_VSM4RVS
])
(define_int_attr ror_rol [(UNSPEC_VROL "rol") (UNSPEC_VROR "ror")])
@@ -47,16 +51,20 @@
(UNSPEC_VAESEMVV "aesem") (UNSPEC_VAESDFVV "aesdf")
(UNSPEC_VAESDMVV "aesdm") (UNSPEC_VAESEFVS "aesef")
(UNSPEC_VAESEMVS "aesem") (UNSPEC_VAESDFVS "aesdf")
- (UNSPEC_VAESDMVS "aesdm") (UNSPEC_VAESZVS "aesz" )])
+ (UNSPEC_VAESDMVS "aesdm") (UNSPEC_VAESZVS "aesz" )
+ (UNSPEC_VSM4RVV "sm4r" ) (UNSPEC_VSM4RVS "sm4r" )])
(define_int_attr vv_ins1_name [(UNSPEC_VGHSH "ghsh") (UNSPEC_VSHA2MS "sha2ms")
(UNSPEC_VSHA2CH "sha2ch") (UNSPEC_VSHA2CL "sha2cl")])
+(define_int_attr vi_ins_name [(UNSPEC_VAESKF1 "aeskf1") (UNSPEC_VSM4K "sm4k")])
+
(define_int_attr ins_type [(UNSPEC_VGMUL "vv") (UNSPEC_VAESEFVV "vv")
(UNSPEC_VAESEMVV "vv") (UNSPEC_VAESDFVV "vv")
(UNSPEC_VAESDMVV "vv") (UNSPEC_VAESEFVS "vs")
(UNSPEC_VAESEMVS "vs") (UNSPEC_VAESDFVS "vs")
- (UNSPEC_VAESDMVS "vs") (UNSPEC_VAESZVS "vs")])
+ (UNSPEC_VAESDMVS "vs") (UNSPEC_VAESZVS "vs")
+ (UNSPEC_VSM4RVV "vv") (UNSPEC_VSM4RVS "vs")])
(define_int_iterator UNSPEC_VRORL [UNSPEC_VROL UNSPEC_VROR])
@@ -69,10 +77,12 @@
(define_int_iterator UNSPEC_CRYPTO_VV [UNSPEC_VGMUL UNSPEC_VAESEFVV UNSPEC_VAESEMVV
UNSPEC_VAESDFVV UNSPEC_VAESDMVV UNSPEC_VAESEFVS
UNSPEC_VAESEMVS UNSPEC_VAESDFVS UNSPEC_VAESDMVS
- UNSPEC_VAESZVS])
+ UNSPEC_VAESZVS UNSPEC_VSM4RVV UNSPEC_VSM4RVS])
(define_int_iterator UNSPEC_VGNHAB [UNSPEC_VGHSH UNSPEC_VSHA2MS UNSPEC_VSHA2CH UNSPEC_VSHA2CL])
+(define_int_iterator UNSPEC_CRYPTO_VI [UNSPEC_VAESKF1 UNSPEC_VSM4K])
+
;; zvbb instructions patterns.
;; vandn.vv vandn.vx vrol.vv vrol.vx
;; vror.vv vror.vx vror.vi
@@ -338,7 +348,7 @@
[(match_operand:VSI 1 "register_operand" " 0")
(match_operand:VSI 2 "register_operand" "vr")] UNSPEC_CRYPTO_VV)
(match_dup 1)))]
- "TARGET_ZVKNED"
+ "TARGET_ZVKG || TARGET_ZVKNED"
"v<vv_ins_name>.<ins_type>\t%0,%2"
[(set_attr "type" "v<vv_ins_name>")
(set_attr "mode" "<VSI:MODE>")])
@@ -356,7 +366,7 @@
[(match_operand:VSI 1 "register_operand" " 0")
(match_operand:VSI 2 "register_operand" "vr")] UNSPEC_CRYPTO_VV)
(match_dup 1)))]
- "TARGET_ZVKNED"
+ "TARGET_ZVKNED || TARGET_ZVKSED"
"v<vv_ins_name>.<ins_type>\t%0,%2"
[(set_attr "type" "v<vv_ins_name>")
(set_attr "mode" "<VSI:MODE>")])
@@ -374,7 +384,7 @@
[(match_operand:<VSIX2> 1 "register_operand" " 0")
(match_operand:VLMULX2_SI 2 "register_operand" "vr")] UNSPEC_CRYPTO_VV)
(match_dup 1)))]
- "TARGET_ZVKNED"
+ "TARGET_ZVKNED || TARGET_ZVKSED"
"v<vv_ins_name>.<ins_type>\t%0,%2"
[(set_attr "type" "v<vv_ins_name>")
(set_attr "mode" "<VLMULX2_SI:MODE>")])
@@ -392,7 +402,7 @@
[(match_operand:<VSIX4> 1 "register_operand" " 0")
(match_operand:VLMULX4_SI 2 "register_operand" "vr")] UNSPEC_CRYPTO_VV)
(match_dup 1)))]
- "TARGET_ZVKNED"
+ "TARGET_ZVKNED || TARGET_ZVKSED"
"v<vv_ins_name>.<ins_type>\t%0,%2"
[(set_attr "type" "v<vv_ins_name>")
(set_attr "mode" "<VLMULX4_SI:MODE>")])
@@ -410,7 +420,7 @@
[(match_operand:<VSIX8> 1 "register_operand" " 0")
(match_operand:VLMULX8_SI 2 "register_operand" "vr")] UNSPEC_CRYPTO_VV)
(match_dup 1)))]
- "TARGET_ZVKNED"
+ "TARGET_ZVKNED || TARGET_ZVKSED"
"v<vv_ins_name>.<ins_type>\t%0,%2"
[(set_attr "type" "v<vv_ins_name>")
(set_attr "mode" "<VLMULX8_SI:MODE>")])
@@ -428,13 +438,13 @@
[(match_operand:<VSIX16> 1 "register_operand" " 0")
(match_operand:VLMULX16_SI 2 "register_operand" "vr")] UNSPEC_CRYPTO_VV)
(match_dup 1)))]
- "TARGET_ZVKNED"
+ "TARGET_ZVKNED || TARGET_ZVKSED"
"v<vv_ins_name>.<ins_type>\t%0,%2"
[(set_attr "type" "v<vv_ins_name>")
(set_attr "mode" "<VLMULX16_SI:MODE>")])
-;; vaeskf1.vi
-(define_insn "@pred_vaeskf1<mode>_scalar"
+;; vaeskf1.vi vsm4k.vi
+(define_insn "@pred_crypto_vi<vi_ins_name><mode>_scalar"
[(set (match_operand:VSI 0 "register_operand" "=vd, vd")
(if_then_else:VSI
(unspec:<VM>
@@ -445,11 +455,11 @@
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(unspec:VSI
[(match_operand:VSI 2 "register_operand" "vr, vr")
- (match_operand:<VEL> 3 "const_int_operand" " i, i")] UNSPEC_VAESKF1)
+ (match_operand:<VEL> 3 "const_int_operand" " i, i")] UNSPEC_CRYPTO_VI)
(match_operand:VSI 1 "vector_merge_operand" "vu, 0")))]
- "TARGET_ZVKNED"
- "vaeskf1.vi\t%0,%2,%3"
- [(set_attr "type" "vaeskf1")
+ "TARGET_ZVKNED || TARGET_ZVKSED"
+ "v<vi_ins_name>.vi\t%0,%2,%3"
+ [(set_attr "type" "v<vi_ins_name>")
(set_attr "mode" "<MODE>")])
;; vaeskf2.vi
@@ -54,7 +54,7 @@
vgather,vcompress,vlsegde,vssegte,vlsegds,vssegts,vlsegdux,vlsegdox,\
vssegtux,vssegtox,vlsegdff,vandn,vbrev,vbrev8,vrev8,vclz,vctz,vrol,\
vror,vwsll,vclmul,vclmulh,vghsh,vgmul,vaesef,vaesem,vaesdf,vaesdm,\
- vaeskf1,vaeskf2,vaesz,vsha2ms,vsha2ch,vsha2cl")
+ vaeskf1,vaeskf2,vaesz,vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r")
(const_string "true")]
(const_string "false")))
@@ -78,7 +78,7 @@
vgather,vcompress,vlsegde,vssegte,vlsegds,vssegts,vlsegdux,vlsegdox,\
vssegtux,vssegtox,vlsegdff,vandn,vbrev,vbrev8,vrev8,vclz,vctz,vrol,\
vror,vwsll,vclmul,vclmulh,vghsh,vgmul,vaesef,vaesem,vaesdf,vaesdm,\
- vaeskf1,vaeskf2,vaesz,vsha2ms,vsha2ch,vsha2cl")
+ vaeskf1,vaeskf2,vaesz,vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r")
(const_string "true")]
(const_string "false")))
@@ -707,7 +707,7 @@
(const_int 2)
(eq_attr "type" "vimerge,vfmerge,vcompress,vghsh,vgmul,vaesef,vaesem,vaesdf,vaesdm,\
- vaeskf1,vaeskf2,vaesz,vsha2ms,vsha2ch,vsha2cl")
+ vaeskf1,vaeskf2,vaesz,vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r")
(const_int 1)
(eq_attr "type" "vimuladd,vfmuladd")
@@ -747,7 +747,7 @@
vfcvtftoi,vfwcvtitof,vfwcvtftoi,vfwcvtftof,vfncvtitof,\
vfncvtftoi,vfncvtftof,vfclass,vimovxv,vfmovfv,vcompress,\
vlsegde,vssegts,vssegtux,vssegtox,vlsegdff,vbrev,vbrev8,vrev8,\
- vghsh,vaeskf1,vaeskf2,vsha2ms,vsha2ch,vsha2cl")
+ vghsh,vaeskf1,vaeskf2,vsha2ms,vsha2ch,vsha2cl,vsm4k")
(const_int 4)
;; If operands[3] of "vlds" is not vector mode, it is pred_broadcast.
@@ -770,7 +770,7 @@
(const_int 6)
(eq_attr "type" "vmpop,vmffs,vmidx,vssegte,vclz,vctz,vgmul,vaesef,vaesem,vaesdf,vaesdm,\
- vaesz")
+ vaesz,vsm4r")
(const_int 3)]
(const_int INVALID_ATTRIBUTE)))
@@ -780,7 +780,7 @@
vfcvtitof,vfcvtftoi,vfwcvtitof,vfwcvtftoi,vfwcvtftof,\
vfncvtitof,vfncvtftoi,vfncvtftof,vfclass,vimovxv,vfmovfv,\
vcompress,vldff,vlsegde,vlsegdff,vbrev,vbrev8,vrev8,vghsh,\
- vaeskf1,vaeskf2,vsha2ms,vsha2ch,vsha2cl")
+ vaeskf1,vaeskf2,vsha2ms,vsha2ch,vsha2cl,vsm4k")
(symbol_ref "riscv_vector::get_ta(operands[5])")
;; If operands[3] of "vlds" is not vector mode, it is pred_broadcast.
@@ -802,7 +802,7 @@
(eq_attr "type" "vimuladd,vfmuladd")
(symbol_ref "riscv_vector::get_ta(operands[7])")
- (eq_attr "type" "vmidx,vgmul,vaesef,vaesem,vaesdf,vaesdm,vaesz")
+ (eq_attr "type" "vmidx,vgmul,vaesef,vaesem,vaesdf,vaesdm,vaesz,vsm4r")
(symbol_ref "riscv_vector::get_ta(operands[4])")]
(const_int INVALID_ATTRIBUTE)))
@@ -844,7 +844,8 @@
vfclass,vired,viwred,vfredu,vfredo,vfwredu,vfwredo,\
vimovxv,vfmovfv,vlsegde,vlsegdff,vbrev,vbrev8,vrev8")
(const_int 7)
- (eq_attr "type" "vldm,vstm,vmalu,vmalu,vgmul,vaesef,vaesem,vaesdf,vaesdm,vaesz")
+ (eq_attr "type" "vldm,vstm,vmalu,vmalu,vgmul,vaesef,vaesem,vaesdf,vaesdm,vaesz,\
+ vsm4r")
(const_int 5)
;; If operands[3] of "vlds" is not vector mode, it is pred_broadcast.
@@ -867,7 +868,8 @@
(eq_attr "type" "vimuladd,vfmuladd")
(const_int 9)
- (eq_attr "type" "vmsfs,vmidx,vcompress,vghsh,vaeskf1,vaeskf2,vsha2ms,vsha2ch,vsha2cl")
+ (eq_attr "type" "vmsfs,vmidx,vcompress,vghsh,vaeskf1,vaeskf2,vsha2ms,vsha2ch,vsha2cl,\
+ vsm4k")
(const_int 6)
(eq_attr "type" "vmpop,vmffs,vssegte,vclz,vctz")
@@ -46,6 +46,7 @@ dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/zvknha/*.\[cS\]]] \
"" $DEFAULT_CFLAGS
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/zvknhb/*.\[cS\]]] \
"" $DEFAULT_CFLAGS
-
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/zvksed/*.\[cS\]]] \
+ "" $DEFAULT_CFLAGS
# All done.
dg-finish
new file mode 100644
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zvksed -mabi=lp64d -O2 -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+/* non-policy */
+vuint32mf2_t test_vsm4k_vi_u32mf2(vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vsm4k_vi_u32mf2(vs2, 0, vl);
+}
+
+vuint32m1_t test_vsm4k_vi_u32m1(vuint32m1_t vs2, size_t vl) {
+ return __riscv_vsm4k_vi_u32m1(vs2, 0, vl);
+}
+
+vuint32m2_t test_vsm4k_vi_u32m2(vuint32m2_t vs2, size_t vl) {
+ return __riscv_vsm4k_vi_u32m2(vs2, 0, vl);
+}
+
+vuint32m4_t test_vsm4k_vi_u32m4(vuint32m4_t vs2, size_t vl) {
+ return __riscv_vsm4k_vi_u32m4(vs2, 0, vl);
+}
+
+vuint32m8_t test_vsm4k_vi_u32m8(vuint32m8_t vs2, size_t vl) {
+ return __riscv_vsm4k_vi_u32m8(vs2, 0, vl);
+}
+
+/* policy */
+vuint32mf2_t test_vsm4k_vi_u32mf2_tu(vuint32mf2_t maskedoff, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vsm4k_vi_u32mf2_tu(maskedoff, vs2, 0, vl);
+}
+
+vuint32m1_t test_vsm4k_vi_u32m1_tu(vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vsm4k_vi_u32m1_tu(maskedoff, vs2, 0, vl);
+}
+
+vuint32m2_t test_vsm4k_vi_u32m2_tu(vuint32m2_t maskedoff, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vsm4k_vi_u32m2_tu(maskedoff, vs2, 0, vl);
+}
+
+vuint32m4_t test_vsm4k_vi_u32m4_tu(vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) {
+ return __riscv_vsm4k_vi_u32m4_tu(maskedoff, vs2, 0, vl);
+}
+
+vuint32m8_t test_vsm4k_vi_u32m8_tu(vuint32m8_t maskedoff, vuint32m8_t vs2, size_t vl) {
+ return __riscv_vsm4k_vi_u32m8_tu(maskedoff, vs2, 0, vl);
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s*zero,\s*[a-x0-9]+,\s*[a-x0-9]+,m[a-x0-9]+,\s*ta,\s*ma} 5 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s*zero,\s*[a-x0-9]+,\s*[a-x0-9]+,m[a-x0-9]+,\s*tu,\s*ma} 5 } } */
+/* { dg-final { scan-assembler-times {vsm4k\.vi\s+v[0-9]+,\s*v[0-9]+,0} 10 } } */
\ No newline at end of file
new file mode 100644
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zvksed -mabi=lp64d -O2 -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+/* non-policy */
+vuint32mf2_t test_vsm4k_vi_u32mf2(vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vsm4k(vs2, 0, vl);
+}
+
+vuint32m1_t test_vsm4k_vi_u32m1(vuint32m1_t vs2, size_t vl) {
+ return __riscv_vsm4k(vs2, 0, vl);
+}
+
+vuint32m2_t test_vsm4k_vi_u32m2(vuint32m2_t vs2, size_t vl) {
+ return __riscv_vsm4k(vs2, 0, vl);
+}
+
+vuint32m4_t test_vsm4k_vi_u32m4(vuint32m4_t vs2, size_t vl) {
+ return __riscv_vsm4k(vs2, 0, vl);
+}
+
+vuint32m8_t test_vsm4k_vi_u32m8(vuint32m8_t vs2, size_t vl) {
+ return __riscv_vsm4k(vs2, 0, vl);
+}
+
+/* policy */
+vuint32mf2_t test_vsm4k_vi_u32mf2_tu(vuint32mf2_t maskedoff, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vsm4k_tu(maskedoff, vs2, 0, vl);
+}
+
+vuint32m1_t test_vsm4k_vi_u32m1_tu(vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vsm4k_tu(maskedoff, vs2, 0, vl);
+}
+
+vuint32m2_t test_vsm4k_vi_u32m2_tu(vuint32m2_t maskedoff, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vsm4k_tu(maskedoff, vs2, 0, vl);
+}
+
+vuint32m4_t test_vsm4k_vi_u32m4_tu(vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) {
+ return __riscv_vsm4k_tu(maskedoff, vs2, 0, vl);
+}
+
+vuint32m8_t test_vsm4k_vi_u32m8_tu(vuint32m8_t maskedoff, vuint32m8_t vs2, size_t vl) {
+ return __riscv_vsm4k_tu(maskedoff, vs2, 0, vl);
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s*zero,\s*[a-x0-9]+,\s*[a-x0-9]+,m[a-x0-9]+,\s*ta,\s*ma} 5 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s*zero,\s*[a-x0-9]+,\s*[a-x0-9]+,m[a-x0-9]+,\s*tu,\s*ma} 5 } } */
+/* { dg-final { scan-assembler-times {vsm4k\.vi\s+v[0-9]+,\s*v[0-9]+,0} 10 } } */
\ No newline at end of file
new file mode 100644
@@ -0,0 +1,170 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zvksed -mabi=lp64d -O2 -Wno-psabi" } */
+#include "riscv_vector.h"
+
+/* non-policy */
+vuint32mf2_t test_vsm4r_vv_u32mf2(vuint32mf2_t vd, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vv_u32mf2(vd, vs2, vl);
+}
+
+vuint32mf2_t test_vsm4r_vs_u32mf2_u32mf2(vuint32mf2_t vd, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_u32mf2_u32mf2(vd, vs2, vl);
+}
+
+vuint32m1_t test_vsm4r_vs_u32mf2_u32m1(vuint32m1_t vd, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_u32mf2_u32m1(vd, vs2, vl);
+}
+
+vuint32m2_t test_vsm4r_vs_u32mf2_u32m2(vuint32m2_t vd, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_u32mf2_u32m2(vd, vs2, vl);
+}
+
+vuint32m4_t test_vsm4r_vs_u32mf2_u32m4(vuint32m4_t vd, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_u32mf2_u32m4(vd, vs2, vl);
+}
+
+vuint32m8_t test_vsm4r_vs_u32mf2_u32m8(vuint32m8_t vd, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_u32mf2_u32m8(vd, vs2, vl);
+}
+
+vuint32m1_t test_vsm4r_vv_u32m1(vuint32m1_t vd, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vsm4r_vv_u32m1(vd, vs2, vl);
+}
+
+vuint32m1_t test_vsm4r_vs_u32m1_u32m1(vuint32m1_t vd, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_u32m1_u32m1(vd, vs2, vl);
+}
+
+vuint32m2_t test_vsm4r_vs_u32m1_u32m2(vuint32m2_t vd, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_u32m1_u32m2(vd, vs2, vl);
+}
+
+vuint32m4_t test_vsm4r_vs_u32m1_u32m4(vuint32m4_t vd, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_u32m1_u32m4(vd, vs2, vl);
+}
+
+vuint32m8_t test_vsm4r_vs_u32m1_u32m8(vuint32m8_t vd, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_u32m1_u32m8(vd, vs2, vl);
+}
+
+vuint32m2_t test_vsm4r_vv_u32m2(vuint32m2_t vd, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vv_u32m2(vd, vs2, vl);
+}
+
+vuint32m2_t test_vsm4r_vs_u32m2_u32m2(vuint32m2_t vd, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_u32m2_u32m2(vd, vs2, vl);
+}
+
+vuint32m4_t test_vsm4r_vs_u32m2_u32m4(vuint32m4_t vd, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_u32m2_u32m4(vd, vs2, vl);
+}
+
+vuint32m8_t test_vsm4r_vs_u32m2_u32m8(vuint32m8_t vd, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_u32m2_u32m8(vd, vs2, vl);
+}
+
+vuint32m4_t test_vsm4r_vv_u32m4(vuint32m4_t vd, vuint32m4_t vs2, size_t vl) {
+ return __riscv_vsm4r_vv_u32m4(vd, vs2, vl);
+}
+
+vuint32m4_t test_vsm4r_vs_u32m4_u32m4(vuint32m4_t vd, vuint32m4_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_u32m4_u32m4(vd, vs2, vl);
+}
+
+vuint32m8_t test_vsm4r_vs_u32m4_u32m8(vuint32m8_t vd, vuint32m4_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_u32m4_u32m8(vd, vs2, vl);
+}
+
+vuint32m8_t test_vsm4r_vv_u32m8(vuint32m8_t vd, vuint32m8_t vs2, size_t vl) {
+ return __riscv_vsm4r_vv_u32m8(vd, vs2, vl);
+}
+
+vuint32m8_t test_vsm4r_vs_u32m8_u32m8(vuint32m8_t vd, vuint32m8_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_u32m8_u32m8(vd, vs2, vl);
+}
+
+/* policy */
+vuint32mf2_t test_vsm4r_vv_u32mf2_tu(vuint32mf2_t vd, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vv_u32mf2_tu(vd, vs2, vl);
+}
+
+vuint32mf2_t test_vsm4r_vs_u32mf2_u32mf2_tu(vuint32mf2_t vd, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_u32mf2_u32mf2_tu(vd, vs2, vl);
+}
+
+vuint32m1_t test_vsm4r_vs_u32mf2_u32m1_tu(vuint32m1_t vd, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_u32mf2_u32m1_tu(vd, vs2, vl);
+}
+
+vuint32m2_t test_vsm4r_vs_u32mf2_u32m2_tu(vuint32m2_t vd, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_u32mf2_u32m2_tu(vd, vs2, vl);
+}
+
+vuint32m4_t test_vsm4r_vs_u32mf2_u32m4_tu(vuint32m4_t vd, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_u32mf2_u32m4_tu(vd, vs2, vl);
+}
+
+vuint32m8_t test_vsm4r_vs_u32mf2_u32m8_tu(vuint32m8_t vd, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_u32mf2_u32m8_tu(vd, vs2, vl);
+}
+
+vuint32m1_t test_vsm4r_vv_u32m1_tu(vuint32m1_t vd, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vsm4r_vv_u32m1_tu(vd, vs2, vl);
+}
+
+vuint32m1_t test_vsm4r_vs_u32m1_u32m1_tu(vuint32m1_t vd, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_u32m1_u32m1_tu(vd, vs2, vl);
+}
+
+vuint32m2_t test_vsm4r_vs_u32m1_u32m2_tu(vuint32m2_t vd, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_u32m1_u32m2_tu(vd, vs2, vl);
+}
+
+vuint32m4_t test_vsm4r_vs_u32m1_u32m4_tu(vuint32m4_t vd, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_u32m1_u32m4_tu(vd, vs2, vl);
+}
+
+vuint32m8_t test_vsm4r_vs_u32m1_u32m8_tu(vuint32m8_t vd, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_u32m1_u32m8_tu(vd, vs2, vl);
+}
+
+vuint32m2_t test_vsm4r_vv_u32m2_tu(vuint32m2_t vd, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vv_u32m2_tu(vd, vs2, vl);
+}
+
+vuint32m2_t test_vsm4r_vs_u32m2_u32m2_tu(vuint32m2_t vd, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_u32m2_u32m2_tu(vd, vs2, vl);
+}
+
+vuint32m4_t test_vsm4r_vs_u32m2_u32m4_tu(vuint32m4_t vd, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_u32m2_u32m4_tu(vd, vs2, vl);
+}
+
+vuint32m8_t test_vsm4r_vs_u32m2_u32m8_tu(vuint32m8_t vd, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_u32m2_u32m8_tu(vd, vs2, vl);
+}
+
+vuint32m4_t test_vsm4r_vv_u32m4_tu(vuint32m4_t vd, vuint32m4_t vs2, size_t vl) {
+ return __riscv_vsm4r_vv_u32m4_tu(vd, vs2, vl);
+}
+
+vuint32m4_t test_vsm4r_vs_u32m4_u32m4_tu(vuint32m4_t vd, vuint32m4_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_u32m4_u32m4_tu(vd, vs2, vl);
+}
+
+vuint32m8_t test_vsm4r_vs_u32m4_u32m8_tu(vuint32m8_t vd, vuint32m4_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_u32m4_u32m8_tu(vd, vs2, vl);
+}
+
+vuint32m8_t test_vsm4r_vv_u32m8_tu(vuint32m8_t vd, vuint32m8_t vs2, size_t vl) {
+ return __riscv_vsm4r_vv_u32m8_tu(vd, vs2, vl);
+}
+
+vuint32m8_t test_vsm4r_vs_u32m8_u32m8_tu(vuint32m8_t vd, vuint32m8_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_u32m8_u32m8_tu(vd, vs2, vl);
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s*zero,\s*[a-x0-9]+,\s*[a-x0-9]+,m[a-x0-9]+,\s*ta,\s*ma} 20 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s*zero,\s*[a-x0-9]+,\s*[a-x0-9]+,m[a-x0-9]+,\s*tu,\s*ma} 20 } } */
+/* { dg-final { scan-assembler-times {vsm4r\.vv\s+v[0-9]+,\s*v[0-9]} 10 } } */
+/* { dg-final { scan-assembler-times {vsm4r\.vs\s+v[0-9]+,\s*v[0-9]} 30 } } */
new file mode 100644
@@ -0,0 +1,170 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zvksed -mabi=lp64d -O2 -Wno-psabi" } */
+#include "riscv_vector.h"
+
+/* non-policy */
+vuint32mf2_t test_vsm4r_vv_u32mf2(vuint32mf2_t vd, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vv(vd, vs2, vl);
+}
+
+vuint32mf2_t test_vsm4r_vs_u32mf2_u32mf2(vuint32mf2_t vd, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs(vd, vs2, vl);
+}
+
+vuint32m1_t test_vsm4r_vs_u32mf2_u32m1(vuint32m1_t vd, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs(vd, vs2, vl);
+}
+
+vuint32m2_t test_vsm4r_vs_u32mf2_u32m2(vuint32m2_t vd, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs(vd, vs2, vl);
+}
+
+vuint32m4_t test_vsm4r_vs_u32mf2_u32m4(vuint32m4_t vd, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs(vd, vs2, vl);
+}
+
+vuint32m8_t test_vsm4r_vs_u32mf2_u32m8(vuint32m8_t vd, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs(vd, vs2, vl);
+}
+
+vuint32m1_t test_vsm4r_vv_u32m1(vuint32m1_t vd, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vsm4r_vv(vd, vs2, vl);
+}
+
+vuint32m1_t test_vsm4r_vs_u32m1_u32m1(vuint32m1_t vd, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs(vd, vs2, vl);
+}
+
+vuint32m2_t test_vsm4r_vs_u32m1_u32m2(vuint32m2_t vd, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs(vd, vs2, vl);
+}
+
+vuint32m4_t test_vsm4r_vs_u32m1_u32m4(vuint32m4_t vd, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs(vd, vs2, vl);
+}
+
+vuint32m8_t test_vsm4r_vs_u32m1_u32m8(vuint32m8_t vd, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs(vd, vs2, vl);
+}
+
+vuint32m2_t test_vsm4r_vv_u32m2(vuint32m2_t vd, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vv(vd, vs2, vl);
+}
+
+vuint32m2_t test_vsm4r_vs_u32m2_u32m2(vuint32m2_t vd, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs(vd, vs2, vl);
+}
+
+vuint32m4_t test_vsm4r_vs_u32m2_u32m4(vuint32m4_t vd, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs(vd, vs2, vl);
+}
+
+vuint32m8_t test_vsm4r_vs_u32m2_u32m8(vuint32m8_t vd, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs(vd, vs2, vl);
+}
+
+vuint32m4_t test_vsm4r_vv_u32m4(vuint32m4_t vd, vuint32m4_t vs2, size_t vl) {
+ return __riscv_vsm4r_vv(vd, vs2, vl);
+}
+
+vuint32m4_t test_vsm4r_vs_u32m4_u32m4(vuint32m4_t vd, vuint32m4_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs(vd, vs2, vl);
+}
+
+vuint32m8_t test_vsm4r_vs_u32m4_u32m8(vuint32m8_t vd, vuint32m4_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs(vd, vs2, vl);
+}
+
+vuint32m8_t test_vsm4r_vv_u32m8(vuint32m8_t vd, vuint32m8_t vs2, size_t vl) {
+ return __riscv_vsm4r_vv(vd, vs2, vl);
+}
+
+vuint32m8_t test_vsm4r_vs_u32m8_u32m8(vuint32m8_t vd, vuint32m8_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs(vd, vs2, vl);
+}
+
+/* policy */
+vuint32mf2_t test_vsm4r_vv_u32mf2_tu(vuint32mf2_t vd, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vv_tu(vd, vs2, vl);
+}
+
+vuint32mf2_t test_vsm4r_vs_u32mf2_u32mf2_tu(vuint32mf2_t vd, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_tu(vd, vs2, vl);
+}
+
+vuint32m1_t test_vsm4r_vs_u32mf2_u32m1_tu(vuint32m1_t vd, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_tu(vd, vs2, vl);
+}
+
+vuint32m2_t test_vsm4r_vs_u32mf2_u32m2_tu(vuint32m2_t vd, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_tu(vd, vs2, vl);
+}
+
+vuint32m4_t test_vsm4r_vs_u32mf2_u32m4_tu(vuint32m4_t vd, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_tu(vd, vs2, vl);
+}
+
+vuint32m8_t test_vsm4r_vs_u32mf2_u32m8_tu(vuint32m8_t vd, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_tu(vd, vs2, vl);
+}
+
+vuint32m1_t test_vsm4r_vv_u32m1_tu(vuint32m1_t vd, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vsm4r_vv_tu(vd, vs2, vl);
+}
+
+vuint32m1_t test_vsm4r_vs_u32m1_u32m1_tu(vuint32m1_t vd, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_tu(vd, vs2, vl);
+}
+
+vuint32m2_t test_vsm4r_vs_u32m1_u32m2_tu(vuint32m2_t vd, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_tu(vd, vs2, vl);
+}
+
+vuint32m4_t test_vsm4r_vs_u32m1_u32m4_tu(vuint32m4_t vd, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_tu(vd, vs2, vl);
+}
+
+vuint32m8_t test_vsm4r_vs_u32m1_u32m8_tu(vuint32m8_t vd, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_tu(vd, vs2, vl);
+}
+
+vuint32m2_t test_vsm4r_vv_u32m2_tu(vuint32m2_t vd, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vv_tu(vd, vs2, vl);
+}
+
+vuint32m2_t test_vsm4r_vs_u32m2_u32m2_tu(vuint32m2_t vd, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_tu(vd, vs2, vl);
+}
+
+vuint32m4_t test_vsm4r_vs_u32m2_u32m4_tu(vuint32m4_t vd, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_tu(vd, vs2, vl);
+}
+
+vuint32m8_t test_vsm4r_vs_u32m2_u32m8_tu(vuint32m8_t vd, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_tu(vd, vs2, vl);
+}
+
+vuint32m4_t test_vsm4r_vv_u32m4_tu(vuint32m4_t vd, vuint32m4_t vs2, size_t vl) {
+ return __riscv_vsm4r_vv_tu(vd, vs2, vl);
+}
+
+vuint32m4_t test_vsm4r_vs_u32m4_u32m4_tu(vuint32m4_t vd, vuint32m4_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_tu(vd, vs2, vl);
+}
+
+vuint32m8_t test_vsm4r_vs_u32m4_u32m8_tu(vuint32m8_t vd, vuint32m4_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_tu(vd, vs2, vl);
+}
+
+vuint32m8_t test_vsm4r_vv_u32m8_tu(vuint32m8_t vd, vuint32m8_t vs2, size_t vl) {
+ return __riscv_vsm4r_vv_tu(vd, vs2, vl);
+}
+
+vuint32m8_t test_vsm4r_vs_u32m8_u32m8_tu(vuint32m8_t vd, vuint32m8_t vs2, size_t vl) {
+ return __riscv_vsm4r_vs_tu(vd, vs2, vl);
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s*zero,\s*[a-x0-9]+,\s*[a-x0-9]+,m[a-x0-9]+,\s*ta,\s*ma} 20 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s*zero,\s*[a-x0-9]+,\s*[a-x0-9]+,m[a-x0-9]+,\s*tu,\s*ma} 20 } } */
+/* { dg-final { scan-assembler-times {vsm4r\.vv\s+v[0-9]+,\s*v[0-9]} 10 } } */
+/* { dg-final { scan-assembler-times {vsm4r\.vs\s+v[0-9]+,\s*v[0-9]} 30 } } */