new file mode 100644
@@ -0,0 +1,132 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** vsetivli\tzero,4,e32,m1,tu,ma
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
+ vint32m1_t v3 = __riscv_vadd_vx_i32m1 (v2, x, 4);
+ vint32m1_t v4 = __riscv_vadd_vx_i32m1_tu (v3, v2, x, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,ta,ma
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vadd\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
+ vint32m1_t v3 = __riscv_vadd_vx_i32m1 (v2, x, 4);
+ vint32m1_t v4 = __riscv_vadd_vx_i32m1_m (mask, v3, x, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,tu,mu
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vadd\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tumu (mask, v, in, 4);
+ vint32m1_t v3 = __riscv_vadd_vx_i32m1 (v2, x, 4);
+ vint32m1_t v4 = __riscv_vadd_vx_i32m1_tumu (mask, v3, v2, x, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in, 4);
+ vint8mf8_t v3 = __riscv_vadd_vx_i8mf8 (v2, x, 4);
+ vint8mf8_t v4 = __riscv_vadd_vx_i8mf8_tu (v3, v2, x, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vadd\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_m (mask, in, 4);
+ vint8mf8_t v3 = __riscv_vadd_vx_i8mf8 (v2, x, 4);
+ vint8mf8_t v4 = __riscv_vadd_vx_i8mf8_m (mask, v3, x, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vadd\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in, 4);
+ vint8mf8_t v3 = __riscv_vadd_vx_i8mf8 (v2, x, 4);
+ vint8mf8_t v4 = __riscv_vadd_vx_i8mf8_tumu (mask, v3, v2, x, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,132 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** vsetivli\tzero,4,e32,m1,tu,ma
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vand\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vand\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
+ vint32m1_t v3 = __riscv_vand_vx_i32m1 (v2, -16, 4);
+ vint32m1_t v4 = __riscv_vand_vx_i32m1_tu (v3, v2, -16, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,ta,ma
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vand\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vand\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*-16,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
+ vint32m1_t v3 = __riscv_vand_vx_i32m1 (v2, -16, 4);
+ vint32m1_t v4 = __riscv_vand_vx_i32m1_m (mask, v3, -16, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,tu,mu
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vand\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vand\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*-16,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tumu (mask, v, in, 4);
+ vint32m1_t v3 = __riscv_vand_vx_i32m1 (v2, -16, 4);
+ vint32m1_t v4 = __riscv_vand_vx_i32m1_tumu (mask, v3, v2, -16, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vand\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vand\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in, 4);
+ vint8mf8_t v3 = __riscv_vand_vx_i8mf8 (v2, -16, 4);
+ vint8mf8_t v4 = __riscv_vand_vx_i8mf8_tu (v3, v2, -16, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vand\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vand\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*-16,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_m (mask, in, 4);
+ vint8mf8_t v3 = __riscv_vand_vx_i8mf8 (v2, -16, 4);
+ vint8mf8_t v4 = __riscv_vand_vx_i8mf8_m (mask, v3, -16, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vand\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vand\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*-16,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in, 4);
+ vint8mf8_t v3 = __riscv_vand_vx_i8mf8 (v2, -16, 4);
+ vint8mf8_t v4 = __riscv_vand_vx_i8mf8_tumu (mask, v3, v2, -16, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,132 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** vsetivli\tzero,4,e32,m1,tu,ma
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vand\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vand\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
+ vint32m1_t v3 = __riscv_vand_vx_i32m1 (v2, 15, 4);
+ vint32m1_t v4 = __riscv_vand_vx_i32m1_tu (v3, v2, 15, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,ta,ma
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vand\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vand\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*15,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
+ vint32m1_t v3 = __riscv_vand_vx_i32m1 (v2, 15, 4);
+ vint32m1_t v4 = __riscv_vand_vx_i32m1_m (mask, v3, 15, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,tu,mu
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vand\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vand\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*15,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tumu (mask, v, in, 4);
+ vint32m1_t v3 = __riscv_vand_vx_i32m1 (v2, 15, 4);
+ vint32m1_t v4 = __riscv_vand_vx_i32m1_tumu (mask, v3, v2, 15, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vand\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vand\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in, 4);
+ vint8mf8_t v3 = __riscv_vand_vx_i8mf8 (v2, 15, 4);
+ vint8mf8_t v4 = __riscv_vand_vx_i8mf8_tu (v3, v2, 15, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vand\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vand\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*15,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_m (mask, in, 4);
+ vint8mf8_t v3 = __riscv_vand_vx_i8mf8 (v2, 15, 4);
+ vint8mf8_t v4 = __riscv_vand_vx_i8mf8_m (mask, v3, 15, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vand\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vand\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*15,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in, 4);
+ vint8mf8_t v3 = __riscv_vand_vx_i8mf8 (v2, 15, 4);
+ vint8mf8_t v4 = __riscv_vand_vx_i8mf8_tumu (mask, v3, v2, 15, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,160 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** ...
+** vsetivli\tzero,4,e32,m1,tu,ma
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vand\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vand\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
+ vint32m1_t v3 = __riscv_vand_vx_i32m1 (v2, 16, 4);
+ vint32m1_t v4 = __riscv_vand_vx_i32m1_tu (v3, v2, 16, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e32,m1,ta,ma
+** ...
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vand\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vand\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
+ vint32m1_t v3 = __riscv_vand_vx_i32m1 (v2, 16, 4);
+ vint32m1_t v4 = __riscv_vand_vx_i32m1_m (mask, v3, 16, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e32,m1,tu,mu
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vand\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vand\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tumu (mask, v, in, 4);
+ vint32m1_t v3 = __riscv_vand_vx_i32m1 (v2, 16, 4);
+ vint32m1_t v4 = __riscv_vand_vx_i32m1_tumu (mask, v3, v2, 16, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** ...
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vand\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vand\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in, 4);
+ vint8mf8_t v3 = __riscv_vand_vx_i8mf8 (v2, 16, 4);
+ vint8mf8_t v4 = __riscv_vand_vx_i8mf8_tu (v3, v2, 16, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** ...
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vand\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vand\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_m (mask, in, 4);
+ vint8mf8_t v3 = __riscv_vand_vx_i8mf8 (v2, 16, 4);
+ vint8mf8_t v4 = __riscv_vand_vx_i8mf8_m (mask, v3, 16, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vand\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vand\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in, 4);
+ vint8mf8_t v3 = __riscv_vand_vx_i8mf8 (v2, 16, 4);
+ vint8mf8_t v4 = __riscv_vand_vx_i8mf8_tumu (mask, v3, v2, 16, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,132 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** vsetivli\tzero,4,e32,m1,tu,ma
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
+ vint32m1_t v3 = __riscv_vor_vx_i32m1 (v2, x, 4);
+ vint32m1_t v4 = __riscv_vor_vx_i32m1_tu (v3, v2, x, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,ta,ma
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vor\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
+ vint32m1_t v3 = __riscv_vor_vx_i32m1 (v2, x, 4);
+ vint32m1_t v4 = __riscv_vor_vx_i32m1_m (mask, v3, x, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,tu,mu
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vor\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tumu (mask, v, in, 4);
+ vint32m1_t v3 = __riscv_vor_vx_i32m1 (v2, x, 4);
+ vint32m1_t v4 = __riscv_vor_vx_i32m1_tumu (mask, v3, v2, x, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in, 4);
+ vint8mf8_t v3 = __riscv_vor_vx_i8mf8 (v2, x, 4);
+ vint8mf8_t v4 = __riscv_vor_vx_i8mf8_tu (v3, v2, x, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vor\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_m (mask, in, 4);
+ vint8mf8_t v3 = __riscv_vor_vx_i8mf8 (v2, x, 4);
+ vint8mf8_t v4 = __riscv_vor_vx_i8mf8_m (mask, v3, x, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vor\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in, 4);
+ vint8mf8_t v3 = __riscv_vor_vx_i8mf8 (v2, x, 4);
+ vint8mf8_t v4 = __riscv_vor_vx_i8mf8_tumu (mask, v3, v2, x, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,132 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** vsetivli\tzero,4,e32,m1,tu,ma
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vor\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vor\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
+ vint32m1_t v3 = __riscv_vor_vx_i32m1 (v2, -16, 4);
+ vint32m1_t v4 = __riscv_vor_vx_i32m1_tu (v3, v2, -16, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,ta,ma
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vor\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vor\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*-16,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
+ vint32m1_t v3 = __riscv_vor_vx_i32m1 (v2, -16, 4);
+ vint32m1_t v4 = __riscv_vor_vx_i32m1_m (mask, v3, -16, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,tu,mu
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vor\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vor\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*-16,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tumu (mask, v, in, 4);
+ vint32m1_t v3 = __riscv_vor_vx_i32m1 (v2, -16, 4);
+ vint32m1_t v4 = __riscv_vor_vx_i32m1_tumu (mask, v3, v2, -16, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vor\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vor\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in, 4);
+ vint8mf8_t v3 = __riscv_vor_vx_i8mf8 (v2, -16, 4);
+ vint8mf8_t v4 = __riscv_vor_vx_i8mf8_tu (v3, v2, -16, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vor\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vor\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*-16,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_m (mask, in, 4);
+ vint8mf8_t v3 = __riscv_vor_vx_i8mf8 (v2, -16, 4);
+ vint8mf8_t v4 = __riscv_vor_vx_i8mf8_m (mask, v3, -16, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vor\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vor\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*-16,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in, 4);
+ vint8mf8_t v3 = __riscv_vor_vx_i8mf8 (v2, -16, 4);
+ vint8mf8_t v4 = __riscv_vor_vx_i8mf8_tumu (mask, v3, v2, -16, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,132 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** vsetivli\tzero,4,e32,m1,tu,ma
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vor\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vor\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
+ vint32m1_t v3 = __riscv_vor_vx_i32m1 (v2, 15, 4);
+ vint32m1_t v4 = __riscv_vor_vx_i32m1_tu (v3, v2, 15, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,ta,ma
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vor\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vor\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*15,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
+ vint32m1_t v3 = __riscv_vor_vx_i32m1 (v2, 15, 4);
+ vint32m1_t v4 = __riscv_vor_vx_i32m1_m (mask, v3, 15, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,tu,mu
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vor\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vor\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*15,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tumu (mask, v, in, 4);
+ vint32m1_t v3 = __riscv_vor_vx_i32m1 (v2, 15, 4);
+ vint32m1_t v4 = __riscv_vor_vx_i32m1_tumu (mask, v3, v2, 15, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vor\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vor\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in, 4);
+ vint8mf8_t v3 = __riscv_vor_vx_i8mf8 (v2, 15, 4);
+ vint8mf8_t v4 = __riscv_vor_vx_i8mf8_tu (v3, v2, 15, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vor\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vor\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*15,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_m (mask, in, 4);
+ vint8mf8_t v3 = __riscv_vor_vx_i8mf8 (v2, 15, 4);
+ vint8mf8_t v4 = __riscv_vor_vx_i8mf8_m (mask, v3, 15, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vor\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vor\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*15,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in, 4);
+ vint8mf8_t v3 = __riscv_vor_vx_i8mf8 (v2, 15, 4);
+ vint8mf8_t v4 = __riscv_vor_vx_i8mf8_tumu (mask, v3, v2, 15, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,160 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** ...
+** vsetivli\tzero,4,e32,m1,tu,ma
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
+ vint32m1_t v3 = __riscv_vor_vx_i32m1 (v2, 16, 4);
+ vint32m1_t v4 = __riscv_vor_vx_i32m1_tu (v3, v2, 16, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e32,m1,ta,ma
+** ...
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vor\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
+ vint32m1_t v3 = __riscv_vor_vx_i32m1 (v2, 16, 4);
+ vint32m1_t v4 = __riscv_vor_vx_i32m1_m (mask, v3, 16, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e32,m1,tu,mu
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vor\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tumu (mask, v, in, 4);
+ vint32m1_t v3 = __riscv_vor_vx_i32m1 (v2, 16, 4);
+ vint32m1_t v4 = __riscv_vor_vx_i32m1_tumu (mask, v3, v2, 16, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** ...
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in, 4);
+ vint8mf8_t v3 = __riscv_vor_vx_i8mf8 (v2, 16, 4);
+ vint8mf8_t v4 = __riscv_vor_vx_i8mf8_tu (v3, v2, 16, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** ...
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vor\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_m (mask, in, 4);
+ vint8mf8_t v3 = __riscv_vor_vx_i8mf8 (v2, 16, 4);
+ vint8mf8_t v4 = __riscv_vor_vx_i8mf8_m (mask, v3, 16, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vor\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in, 4);
+ vint8mf8_t v3 = __riscv_vor_vx_i8mf8 (v2, 16, 4);
+ vint8mf8_t v4 = __riscv_vor_vx_i8mf8_tumu (mask, v3, v2, 16, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,160 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** ...
+** vsetivli\tzero,4,e32,m1,tu,ma
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vmul\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmul\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
+ vint32m1_t v3 = __riscv_vmul_vx_i32m1 (v2, 5, 4);
+ vint32m1_t v4 = __riscv_vmul_vx_i32m1_tu (v3, v2, 5, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e32,m1,ta,ma
+** ...
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vmul\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmul\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
+ vint32m1_t v3 = __riscv_vmul_vx_i32m1 (v2, 5, 4);
+ vint32m1_t v4 = __riscv_vmul_vx_i32m1_m (mask, v3, 5, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e32,m1,tu,mu
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vmul\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmul\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tumu (mask, v, in, 4);
+ vint32m1_t v3 = __riscv_vmul_vx_i32m1 (v2, 5, 4);
+ vint32m1_t v4 = __riscv_vmul_vx_i32m1_tumu (mask, v3, v2, 5, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** ...
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vmul\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmul\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in, 4);
+ vint8mf8_t v3 = __riscv_vmul_vx_i8mf8 (v2, 5, 4);
+ vint8mf8_t v4 = __riscv_vmul_vx_i8mf8_tu (v3, v2, 5, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** ...
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vmul\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmul\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_m (mask, in, 4);
+ vint8mf8_t v3 = __riscv_vmul_vx_i8mf8 (v2, 5, 4);
+ vint8mf8_t v4 = __riscv_vmul_vx_i8mf8_m (mask, v3, 5, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vmul\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmul\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in, 4);
+ vint8mf8_t v3 = __riscv_vmul_vx_i8mf8 (v2, 5, 4);
+ vint8mf8_t v4 = __riscv_vmul_vx_i8mf8_tumu (mask, v3, v2, 5, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,132 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** vsetivli\tzero,4,e32,m1,tu,ma
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vmul\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmul\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
+ vint32m1_t v3 = __riscv_vmul_vx_i32m1 (v2, x, 4);
+ vint32m1_t v4 = __riscv_vmul_vx_i32m1_tu (v3, v2, x, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,ta,ma
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vmul\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmul\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
+ vint32m1_t v3 = __riscv_vmul_vx_i32m1 (v2, x, 4);
+ vint32m1_t v4 = __riscv_vmul_vx_i32m1_m (mask, v3, x, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,tu,mu
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vmul\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmul\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tumu (mask, v, in, 4);
+ vint32m1_t v3 = __riscv_vmul_vx_i32m1 (v2, x, 4);
+ vint32m1_t v4 = __riscv_vmul_vx_i32m1_tumu (mask, v3, v2, x, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vmul\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmul\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in, 4);
+ vint8mf8_t v3 = __riscv_vmul_vx_i8mf8 (v2, x, 4);
+ vint8mf8_t v4 = __riscv_vmul_vx_i8mf8_tu (v3, v2, x, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vmul\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmul\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_m (mask, in, 4);
+ vint8mf8_t v3 = __riscv_vmul_vx_i8mf8 (v2, x, 4);
+ vint8mf8_t v4 = __riscv_vmul_vx_i8mf8_m (mask, v3, x, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vmul\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmul\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in, 4);
+ vint8mf8_t v3 = __riscv_vmul_vx_i8mf8 (v2, x, 4);
+ vint8mf8_t v4 = __riscv_vmul_vx_i8mf8_tumu (mask, v3, v2, x, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,160 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** ...
+** vsetivli\tzero,4,e32,m1,tu,ma
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
+ vint32m1_t v3 = __riscv_vmax_vx_i32m1 (v2, 5, 4);
+ vint32m1_t v4 = __riscv_vmax_vx_i32m1_tu (v3, v2, 5, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e32,m1,ta,ma
+** ...
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmax\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
+ vint32m1_t v3 = __riscv_vmax_vx_i32m1 (v2, 5, 4);
+ vint32m1_t v4 = __riscv_vmax_vx_i32m1_m (mask, v3, 5, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e32,m1,tu,mu
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmax\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tumu (mask, v, in, 4);
+ vint32m1_t v3 = __riscv_vmax_vx_i32m1 (v2, 5, 4);
+ vint32m1_t v4 = __riscv_vmax_vx_i32m1_tumu (mask, v3, v2, 5, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** ...
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in, 4);
+ vint8mf8_t v3 = __riscv_vmax_vx_i8mf8 (v2, 5, 4);
+ vint8mf8_t v4 = __riscv_vmax_vx_i8mf8_tu (v3, v2, 5, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** ...
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmax\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_m (mask, in, 4);
+ vint8mf8_t v3 = __riscv_vmax_vx_i8mf8 (v2, 5, 4);
+ vint8mf8_t v4 = __riscv_vmax_vx_i8mf8_m (mask, v3, 5, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmax\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in, 4);
+ vint8mf8_t v3 = __riscv_vmax_vx_i8mf8 (v2, 5, 4);
+ vint8mf8_t v4 = __riscv_vmax_vx_i8mf8_tumu (mask, v3, v2, 5, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,132 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** vsetivli\tzero,4,e32,m1,tu,ma
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
+ vint32m1_t v3 = __riscv_vadd_vx_i32m1 (v2, -16, 4);
+ vint32m1_t v4 = __riscv_vadd_vx_i32m1_tu (v3, v2, -16, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,ta,ma
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vadd\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*-16,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
+ vint32m1_t v3 = __riscv_vadd_vx_i32m1 (v2, -16, 4);
+ vint32m1_t v4 = __riscv_vadd_vx_i32m1_m (mask, v3, -16, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,tu,mu
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vadd\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*-16,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tumu (mask, v, in, 4);
+ vint32m1_t v3 = __riscv_vadd_vx_i32m1 (v2, -16, 4);
+ vint32m1_t v4 = __riscv_vadd_vx_i32m1_tumu (mask, v3, v2, -16, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in, 4);
+ vint8mf8_t v3 = __riscv_vadd_vx_i8mf8 (v2, -16, 4);
+ vint8mf8_t v4 = __riscv_vadd_vx_i8mf8_tu (v3, v2, -16, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vadd\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*-16,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_m (mask, in, 4);
+ vint8mf8_t v3 = __riscv_vadd_vx_i8mf8 (v2, -16, 4);
+ vint8mf8_t v4 = __riscv_vadd_vx_i8mf8_m (mask, v3, -16, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vadd\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*-16,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in, 4);
+ vint8mf8_t v3 = __riscv_vadd_vx_i8mf8 (v2, -16, 4);
+ vint8mf8_t v4 = __riscv_vadd_vx_i8mf8_tumu (mask, v3, v2, -16, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,132 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** vsetivli\tzero,4,e32,m1,tu,ma
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
+ vint32m1_t v3 = __riscv_vmax_vx_i32m1 (v2, x, 4);
+ vint32m1_t v4 = __riscv_vmax_vx_i32m1_tu (v3, v2, x, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,ta,ma
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmax\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
+ vint32m1_t v3 = __riscv_vmax_vx_i32m1 (v2, x, 4);
+ vint32m1_t v4 = __riscv_vmax_vx_i32m1_m (mask, v3, x, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,tu,mu
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmax\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tumu (mask, v, in, 4);
+ vint32m1_t v3 = __riscv_vmax_vx_i32m1 (v2, x, 4);
+ vint32m1_t v4 = __riscv_vmax_vx_i32m1_tumu (mask, v3, v2, x, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in, 4);
+ vint8mf8_t v3 = __riscv_vmax_vx_i8mf8 (v2, x, 4);
+ vint8mf8_t v4 = __riscv_vmax_vx_i8mf8_tu (v3, v2, x, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmax\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_m (mask, in, 4);
+ vint8mf8_t v3 = __riscv_vmax_vx_i8mf8 (v2, x, 4);
+ vint8mf8_t v4 = __riscv_vmax_vx_i8mf8_m (mask, v3, x, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmax\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in, 4);
+ vint8mf8_t v3 = __riscv_vmax_vx_i8mf8 (v2, x, 4);
+ vint8mf8_t v4 = __riscv_vmax_vx_i8mf8_tumu (mask, v3, v2, x, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,160 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** ...
+** vsetivli\tzero,4,e32,m1,tu,ma
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vmin\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmin\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
+ vint32m1_t v3 = __riscv_vmin_vx_i32m1 (v2, 5, 4);
+ vint32m1_t v4 = __riscv_vmin_vx_i32m1_tu (v3, v2, 5, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e32,m1,ta,ma
+** ...
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vmin\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmin\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
+ vint32m1_t v3 = __riscv_vmin_vx_i32m1 (v2, 5, 4);
+ vint32m1_t v4 = __riscv_vmin_vx_i32m1_m (mask, v3, 5, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e32,m1,tu,mu
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vmin\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmin\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tumu (mask, v, in, 4);
+ vint32m1_t v3 = __riscv_vmin_vx_i32m1 (v2, 5, 4);
+ vint32m1_t v4 = __riscv_vmin_vx_i32m1_tumu (mask, v3, v2, 5, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** ...
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vmin\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmin\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in, 4);
+ vint8mf8_t v3 = __riscv_vmin_vx_i8mf8 (v2, 5, 4);
+ vint8mf8_t v4 = __riscv_vmin_vx_i8mf8_tu (v3, v2, 5, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** ...
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vmin\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmin\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_m (mask, in, 4);
+ vint8mf8_t v3 = __riscv_vmin_vx_i8mf8 (v2, 5, 4);
+ vint8mf8_t v4 = __riscv_vmin_vx_i8mf8_m (mask, v3, 5, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vmin\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmin\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in, 4);
+ vint8mf8_t v3 = __riscv_vmin_vx_i8mf8 (v2, 5, 4);
+ vint8mf8_t v4 = __riscv_vmin_vx_i8mf8_tumu (mask, v3, v2, 5, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,132 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** vsetivli\tzero,4,e32,m1,tu,ma
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vmin\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmin\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
+ vint32m1_t v3 = __riscv_vmin_vx_i32m1 (v2, x, 4);
+ vint32m1_t v4 = __riscv_vmin_vx_i32m1_tu (v3, v2, x, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,ta,ma
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vmin\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmin\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
+ vint32m1_t v3 = __riscv_vmin_vx_i32m1 (v2, x, 4);
+ vint32m1_t v4 = __riscv_vmin_vx_i32m1_m (mask, v3, x, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,tu,mu
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vmin\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmin\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tumu (mask, v, in, 4);
+ vint32m1_t v3 = __riscv_vmin_vx_i32m1 (v2, x, 4);
+ vint32m1_t v4 = __riscv_vmin_vx_i32m1_tumu (mask, v3, v2, x, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vmin\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmin\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in, 4);
+ vint8mf8_t v3 = __riscv_vmin_vx_i8mf8 (v2, x, 4);
+ vint8mf8_t v4 = __riscv_vmin_vx_i8mf8_tu (v3, v2, x, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vmin\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmin\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_m (mask, in, 4);
+ vint8mf8_t v3 = __riscv_vmin_vx_i8mf8 (v2, x, 4);
+ vint8mf8_t v4 = __riscv_vmin_vx_i8mf8_m (mask, v3, x, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vmin\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmin\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in, 4);
+ vint8mf8_t v3 = __riscv_vmin_vx_i8mf8 (v2, x, 4);
+ vint8mf8_t v4 = __riscv_vmin_vx_i8mf8_tumu (mask, v3, v2, x, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,160 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** ...
+** vsetivli\tzero,4,e32,m1,tu,ma
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vmaxu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmaxu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_tu (v, in, 4);
+ vuint32m1_t v3 = __riscv_vmaxu_vx_u32m1 (v2, 5, 4);
+ vuint32m1_t v4 = __riscv_vmaxu_vx_u32m1_tu (v3, v2, 5, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e32,m1,ta,ma
+** ...
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vmaxu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmaxu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_m (mask, in, 4);
+ vuint32m1_t v3 = __riscv_vmaxu_vx_u32m1 (v2, 5, 4);
+ vuint32m1_t v4 = __riscv_vmaxu_vx_u32m1_m (mask, v3, 5, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e32,m1,tu,mu
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vmaxu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmaxu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_tumu (mask, v, in, 4);
+ vuint32m1_t v3 = __riscv_vmaxu_vx_u32m1 (v2, 5, 4);
+ vuint32m1_t v4 = __riscv_vmaxu_vx_u32m1_tumu (mask, v3, v2, 5, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** ...
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vmaxu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmaxu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_tu (v, in, 4);
+ vuint8mf8_t v3 = __riscv_vmaxu_vx_u8mf8 (v2, 5, 4);
+ vuint8mf8_t v4 = __riscv_vmaxu_vx_u8mf8_tu (v3, v2, 5, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** ...
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vmaxu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmaxu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_m (mask, in, 4);
+ vuint8mf8_t v3 = __riscv_vmaxu_vx_u8mf8 (v2, 5, 4);
+ vuint8mf8_t v4 = __riscv_vmaxu_vx_u8mf8_m (mask, v3, 5, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vmaxu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmaxu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_tumu (mask, v, in, 4);
+ vuint8mf8_t v3 = __riscv_vmaxu_vx_u8mf8 (v2, 5, 4);
+ vuint8mf8_t v4 = __riscv_vmaxu_vx_u8mf8_tumu (mask, v3, v2, 5, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,132 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** vsetivli\tzero,4,e32,m1,tu,ma
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vmaxu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmaxu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, uint32_t x)
+{
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_tu (v, in, 4);
+ vuint32m1_t v3 = __riscv_vmaxu_vx_u32m1 (v2, x, 4);
+ vuint32m1_t v4 = __riscv_vmaxu_vx_u32m1_tu (v3, v2, x, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,ta,ma
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vmaxu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmaxu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, uint32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_m (mask, in, 4);
+ vuint32m1_t v3 = __riscv_vmaxu_vx_u32m1 (v2, x, 4);
+ vuint32m1_t v4 = __riscv_vmaxu_vx_u32m1_m (mask, v3, x, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,tu,mu
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vmaxu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmaxu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, uint32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_tumu (mask, v, in, 4);
+ vuint32m1_t v3 = __riscv_vmaxu_vx_u32m1 (v2, x, 4);
+ vuint32m1_t v4 = __riscv_vmaxu_vx_u32m1_tumu (mask, v3, v2, x, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vmaxu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmaxu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, uint8_t x)
+{
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_tu (v, in, 4);
+ vuint8mf8_t v3 = __riscv_vmaxu_vx_u8mf8 (v2, x, 4);
+ vuint8mf8_t v4 = __riscv_vmaxu_vx_u8mf8_tu (v3, v2, x, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vmaxu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmaxu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, uint8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_m (mask, in, 4);
+ vuint8mf8_t v3 = __riscv_vmaxu_vx_u8mf8 (v2, x, 4);
+ vuint8mf8_t v4 = __riscv_vmaxu_vx_u8mf8_m (mask, v3, x, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vmaxu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmaxu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, uint8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_tumu (mask, v, in, 4);
+ vuint8mf8_t v3 = __riscv_vmaxu_vx_u8mf8 (v2, x, 4);
+ vuint8mf8_t v4 = __riscv_vmaxu_vx_u8mf8_tumu (mask, v3, v2, x, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,160 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** ...
+** vsetivli\tzero,4,e32,m1,tu,ma
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vminu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vminu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_tu (v, in, 4);
+ vuint32m1_t v3 = __riscv_vminu_vx_u32m1 (v2, 5, 4);
+ vuint32m1_t v4 = __riscv_vminu_vx_u32m1_tu (v3, v2, 5, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e32,m1,ta,ma
+** ...
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vminu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vminu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_m (mask, in, 4);
+ vuint32m1_t v3 = __riscv_vminu_vx_u32m1 (v2, 5, 4);
+ vuint32m1_t v4 = __riscv_vminu_vx_u32m1_m (mask, v3, 5, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e32,m1,tu,mu
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vminu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vminu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_tumu (mask, v, in, 4);
+ vuint32m1_t v3 = __riscv_vminu_vx_u32m1 (v2, 5, 4);
+ vuint32m1_t v4 = __riscv_vminu_vx_u32m1_tumu (mask, v3, v2, 5, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** ...
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vminu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vminu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_tu (v, in, 4);
+ vuint8mf8_t v3 = __riscv_vminu_vx_u8mf8 (v2, 5, 4);
+ vuint8mf8_t v4 = __riscv_vminu_vx_u8mf8_tu (v3, v2, 5, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** ...
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vminu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vminu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_m (mask, in, 4);
+ vuint8mf8_t v3 = __riscv_vminu_vx_u8mf8 (v2, 5, 4);
+ vuint8mf8_t v4 = __riscv_vminu_vx_u8mf8_m (mask, v3, 5, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vminu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vminu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_tumu (mask, v, in, 4);
+ vuint8mf8_t v3 = __riscv_vminu_vx_u8mf8 (v2, 5, 4);
+ vuint8mf8_t v4 = __riscv_vminu_vx_u8mf8_tumu (mask, v3, v2, 5, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,132 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** vsetivli\tzero,4,e32,m1,tu,ma
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vminu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vminu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, uint32_t x)
+{
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_tu (v, in, 4);
+ vuint32m1_t v3 = __riscv_vminu_vx_u32m1 (v2, x, 4);
+ vuint32m1_t v4 = __riscv_vminu_vx_u32m1_tu (v3, v2, x, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,ta,ma
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vminu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vminu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, uint32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_m (mask, in, 4);
+ vuint32m1_t v3 = __riscv_vminu_vx_u32m1 (v2, x, 4);
+ vuint32m1_t v4 = __riscv_vminu_vx_u32m1_m (mask, v3, x, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,tu,mu
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vminu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vminu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, uint32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_tumu (mask, v, in, 4);
+ vuint32m1_t v3 = __riscv_vminu_vx_u32m1 (v2, x, 4);
+ vuint32m1_t v4 = __riscv_vminu_vx_u32m1_tumu (mask, v3, v2, x, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vminu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vminu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, uint8_t x)
+{
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_tu (v, in, 4);
+ vuint8mf8_t v3 = __riscv_vminu_vx_u8mf8 (v2, x, 4);
+ vuint8mf8_t v4 = __riscv_vminu_vx_u8mf8_tu (v3, v2, x, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vminu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vminu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, uint8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_m (mask, in, 4);
+ vuint8mf8_t v3 = __riscv_vminu_vx_u8mf8 (v2, x, 4);
+ vuint8mf8_t v4 = __riscv_vminu_vx_u8mf8_m (mask, v3, x, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vminu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vminu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, uint8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_tumu (mask, v, in, 4);
+ vuint8mf8_t v3 = __riscv_vminu_vx_u8mf8 (v2, x, 4);
+ vuint8mf8_t v4 = __riscv_vminu_vx_u8mf8_tumu (mask, v3, v2, x, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,160 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** ...
+** vsetivli\tzero,4,e32,m1,tu,ma
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vdiv\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdiv\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
+ vint32m1_t v3 = __riscv_vdiv_vx_i32m1 (v2, 5, 4);
+ vint32m1_t v4 = __riscv_vdiv_vx_i32m1_tu (v3, v2, 5, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e32,m1,ta,ma
+** ...
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vdiv\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdiv\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
+ vint32m1_t v3 = __riscv_vdiv_vx_i32m1 (v2, 5, 4);
+ vint32m1_t v4 = __riscv_vdiv_vx_i32m1_m (mask, v3, 5, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e32,m1,tu,mu
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vdiv\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdiv\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tumu (mask, v, in, 4);
+ vint32m1_t v3 = __riscv_vdiv_vx_i32m1 (v2, 5, 4);
+ vint32m1_t v4 = __riscv_vdiv_vx_i32m1_tumu (mask, v3, v2, 5, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** ...
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vdiv\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdiv\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in, 4);
+ vint8mf8_t v3 = __riscv_vdiv_vx_i8mf8 (v2, 5, 4);
+ vint8mf8_t v4 = __riscv_vdiv_vx_i8mf8_tu (v3, v2, 5, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** ...
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vdiv\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdiv\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_m (mask, in, 4);
+ vint8mf8_t v3 = __riscv_vdiv_vx_i8mf8 (v2, 5, 4);
+ vint8mf8_t v4 = __riscv_vdiv_vx_i8mf8_m (mask, v3, 5, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vdiv\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdiv\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in, 4);
+ vint8mf8_t v3 = __riscv_vdiv_vx_i8mf8 (v2, 5, 4);
+ vint8mf8_t v4 = __riscv_vdiv_vx_i8mf8_tumu (mask, v3, v2, 5, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,132 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** vsetivli\tzero,4,e32,m1,tu,ma
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vdiv\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdiv\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
+ vint32m1_t v3 = __riscv_vdiv_vx_i32m1 (v2, x, 4);
+ vint32m1_t v4 = __riscv_vdiv_vx_i32m1_tu (v3, v2, x, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,ta,ma
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vdiv\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdiv\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
+ vint32m1_t v3 = __riscv_vdiv_vx_i32m1 (v2, x, 4);
+ vint32m1_t v4 = __riscv_vdiv_vx_i32m1_m (mask, v3, x, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,tu,mu
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vdiv\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdiv\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tumu (mask, v, in, 4);
+ vint32m1_t v3 = __riscv_vdiv_vx_i32m1 (v2, x, 4);
+ vint32m1_t v4 = __riscv_vdiv_vx_i32m1_tumu (mask, v3, v2, x, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vdiv\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdiv\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in, 4);
+ vint8mf8_t v3 = __riscv_vdiv_vx_i8mf8 (v2, x, 4);
+ vint8mf8_t v4 = __riscv_vdiv_vx_i8mf8_tu (v3, v2, x, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vdiv\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdiv\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_m (mask, in, 4);
+ vint8mf8_t v3 = __riscv_vdiv_vx_i8mf8 (v2, x, 4);
+ vint8mf8_t v4 = __riscv_vdiv_vx_i8mf8_m (mask, v3, x, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vdiv\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdiv\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in, 4);
+ vint8mf8_t v3 = __riscv_vdiv_vx_i8mf8 (v2, x, 4);
+ vint8mf8_t v4 = __riscv_vdiv_vx_i8mf8_tumu (mask, v3, v2, x, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,160 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** ...
+** vsetivli\tzero,4,e32,m1,tu,ma
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vdivu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdivu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_tu (v, in, 4);
+ vuint32m1_t v3 = __riscv_vdivu_vx_u32m1 (v2, 5, 4);
+ vuint32m1_t v4 = __riscv_vdivu_vx_u32m1_tu (v3, v2, 5, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e32,m1,ta,ma
+** ...
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vdivu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdivu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_m (mask, in, 4);
+ vuint32m1_t v3 = __riscv_vdivu_vx_u32m1 (v2, 5, 4);
+ vuint32m1_t v4 = __riscv_vdivu_vx_u32m1_m (mask, v3, 5, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e32,m1,tu,mu
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vdivu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdivu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_tumu (mask, v, in, 4);
+ vuint32m1_t v3 = __riscv_vdivu_vx_u32m1 (v2, 5, 4);
+ vuint32m1_t v4 = __riscv_vdivu_vx_u32m1_tumu (mask, v3, v2, 5, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** ...
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vdivu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdivu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_tu (v, in, 4);
+ vuint8mf8_t v3 = __riscv_vdivu_vx_u8mf8 (v2, 5, 4);
+ vuint8mf8_t v4 = __riscv_vdivu_vx_u8mf8_tu (v3, v2, 5, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** ...
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vdivu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdivu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_m (mask, in, 4);
+ vuint8mf8_t v3 = __riscv_vdivu_vx_u8mf8 (v2, 5, 4);
+ vuint8mf8_t v4 = __riscv_vdivu_vx_u8mf8_m (mask, v3, 5, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vdivu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdivu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_tumu (mask, v, in, 4);
+ vuint8mf8_t v3 = __riscv_vdivu_vx_u8mf8 (v2, 5, 4);
+ vuint8mf8_t v4 = __riscv_vdivu_vx_u8mf8_tumu (mask, v3, v2, 5, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,132 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** vsetivli\tzero,4,e32,m1,tu,ma
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
+ vint32m1_t v3 = __riscv_vadd_vx_i32m1 (v2, 15, 4);
+ vint32m1_t v4 = __riscv_vadd_vx_i32m1_tu (v3, v2, 15, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,ta,ma
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vadd\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*15,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
+ vint32m1_t v3 = __riscv_vadd_vx_i32m1 (v2, 15, 4);
+ vint32m1_t v4 = __riscv_vadd_vx_i32m1_m (mask, v3, 15, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,tu,mu
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vadd\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*15,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tumu (mask, v, in, 4);
+ vint32m1_t v3 = __riscv_vadd_vx_i32m1 (v2, 15, 4);
+ vint32m1_t v4 = __riscv_vadd_vx_i32m1_tumu (mask, v3, v2, 15, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in, 4);
+ vint8mf8_t v3 = __riscv_vadd_vx_i8mf8 (v2, 15, 4);
+ vint8mf8_t v4 = __riscv_vadd_vx_i8mf8_tu (v3, v2, 15, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vadd\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*15,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_m (mask, in, 4);
+ vint8mf8_t v3 = __riscv_vadd_vx_i8mf8 (v2, 15, 4);
+ vint8mf8_t v4 = __riscv_vadd_vx_i8mf8_m (mask, v3, 15, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vadd\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*15,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in, 4);
+ vint8mf8_t v3 = __riscv_vadd_vx_i8mf8 (v2, 15, 4);
+ vint8mf8_t v4 = __riscv_vadd_vx_i8mf8_tumu (mask, v3, v2, 15, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,132 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** vsetivli\tzero,4,e32,m1,tu,ma
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vdivu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdivu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, uint32_t x)
+{
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_tu (v, in, 4);
+ vuint32m1_t v3 = __riscv_vdivu_vx_u32m1 (v2, x, 4);
+ vuint32m1_t v4 = __riscv_vdivu_vx_u32m1_tu (v3, v2, x, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,ta,ma
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vdivu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdivu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, uint32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_m (mask, in, 4);
+ vuint32m1_t v3 = __riscv_vdivu_vx_u32m1 (v2, x, 4);
+ vuint32m1_t v4 = __riscv_vdivu_vx_u32m1_m (mask, v3, x, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,tu,mu
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vdivu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdivu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, uint32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_tumu (mask, v, in, 4);
+ vuint32m1_t v3 = __riscv_vdivu_vx_u32m1 (v2, x, 4);
+ vuint32m1_t v4 = __riscv_vdivu_vx_u32m1_tumu (mask, v3, v2, x, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vdivu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdivu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, uint8_t x)
+{
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_tu (v, in, 4);
+ vuint8mf8_t v3 = __riscv_vdivu_vx_u8mf8 (v2, x, 4);
+ vuint8mf8_t v4 = __riscv_vdivu_vx_u8mf8_tu (v3, v2, x, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vdivu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdivu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, uint8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_m (mask, in, 4);
+ vuint8mf8_t v3 = __riscv_vdivu_vx_u8mf8 (v2, x, 4);
+ vuint8mf8_t v4 = __riscv_vdivu_vx_u8mf8_m (mask, v3, x, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vdivu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdivu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, uint8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_tumu (mask, v, in, 4);
+ vuint8mf8_t v3 = __riscv_vdivu_vx_u8mf8 (v2, x, 4);
+ vuint8mf8_t v4 = __riscv_vdivu_vx_u8mf8_tumu (mask, v3, v2, x, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,160 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** ...
+** vsetivli\tzero,4,e32,m1,tu,ma
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vdivu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdivu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_tu (v, in, 4);
+ vuint32m1_t v3 = __riscv_vdivu_vx_u32m1 (v2, 5, 4);
+ vuint32m1_t v4 = __riscv_vdivu_vx_u32m1_tu (v3, v2, 5, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e32,m1,ta,ma
+** ...
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vdivu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdivu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_m (mask, in, 4);
+ vuint32m1_t v3 = __riscv_vdivu_vx_u32m1 (v2, 5, 4);
+ vuint32m1_t v4 = __riscv_vdivu_vx_u32m1_m (mask, v3, 5, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e32,m1,tu,mu
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vdivu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdivu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_tumu (mask, v, in, 4);
+ vuint32m1_t v3 = __riscv_vdivu_vx_u32m1 (v2, 5, 4);
+ vuint32m1_t v4 = __riscv_vdivu_vx_u32m1_tumu (mask, v3, v2, 5, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** ...
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vdivu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdivu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_tu (v, in, 4);
+ vuint8mf8_t v3 = __riscv_vdivu_vx_u8mf8 (v2, 5, 4);
+ vuint8mf8_t v4 = __riscv_vdivu_vx_u8mf8_tu (v3, v2, 5, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** ...
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vdivu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdivu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_m (mask, in, 4);
+ vuint8mf8_t v3 = __riscv_vdivu_vx_u8mf8 (v2, 5, 4);
+ vuint8mf8_t v4 = __riscv_vdivu_vx_u8mf8_m (mask, v3, 5, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vdivu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdivu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_tumu (mask, v, in, 4);
+ vuint8mf8_t v3 = __riscv_vdivu_vx_u8mf8 (v2, 5, 4);
+ vuint8mf8_t v4 = __riscv_vdivu_vx_u8mf8_tumu (mask, v3, v2, 5, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,132 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** vsetivli\tzero,4,e32,m1,tu,ma
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vdivu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdivu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, uint32_t x)
+{
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_tu (v, in, 4);
+ vuint32m1_t v3 = __riscv_vdivu_vx_u32m1 (v2, x, 4);
+ vuint32m1_t v4 = __riscv_vdivu_vx_u32m1_tu (v3, v2, x, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,ta,ma
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vdivu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdivu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, uint32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_m (mask, in, 4);
+ vuint32m1_t v3 = __riscv_vdivu_vx_u32m1 (v2, x, 4);
+ vuint32m1_t v4 = __riscv_vdivu_vx_u32m1_m (mask, v3, x, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,tu,mu
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vdivu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdivu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, uint32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_tumu (mask, v, in, 4);
+ vuint32m1_t v3 = __riscv_vdivu_vx_u32m1 (v2, x, 4);
+ vuint32m1_t v4 = __riscv_vdivu_vx_u32m1_tumu (mask, v3, v2, x, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vdivu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdivu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, uint8_t x)
+{
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_tu (v, in, 4);
+ vuint8mf8_t v3 = __riscv_vdivu_vx_u8mf8 (v2, x, 4);
+ vuint8mf8_t v4 = __riscv_vdivu_vx_u8mf8_tu (v3, v2, x, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vdivu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdivu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, uint8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_m (mask, in, 4);
+ vuint8mf8_t v3 = __riscv_vdivu_vx_u8mf8 (v2, x, 4);
+ vuint8mf8_t v4 = __riscv_vdivu_vx_u8mf8_m (mask, v3, x, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vdivu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vdivu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, uint8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_tumu (mask, v, in, 4);
+ vuint8mf8_t v3 = __riscv_vdivu_vx_u8mf8 (v2, x, 4);
+ vuint8mf8_t v4 = __riscv_vdivu_vx_u8mf8_tumu (mask, v3, v2, x, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,160 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** ...
+** vsetivli\tzero,4,e32,m1,tu,ma
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vremu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vremu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_tu (v, in, 4);
+ vuint32m1_t v3 = __riscv_vremu_vx_u32m1 (v2, 5, 4);
+ vuint32m1_t v4 = __riscv_vremu_vx_u32m1_tu (v3, v2, 5, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e32,m1,ta,ma
+** ...
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vremu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vremu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_m (mask, in, 4);
+ vuint32m1_t v3 = __riscv_vremu_vx_u32m1 (v2, 5, 4);
+ vuint32m1_t v4 = __riscv_vremu_vx_u32m1_m (mask, v3, 5, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e32,m1,tu,mu
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vremu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vremu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_tumu (mask, v, in, 4);
+ vuint32m1_t v3 = __riscv_vremu_vx_u32m1 (v2, 5, 4);
+ vuint32m1_t v4 = __riscv_vremu_vx_u32m1_tumu (mask, v3, v2, 5, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** ...
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vremu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vremu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_tu (v, in, 4);
+ vuint8mf8_t v3 = __riscv_vremu_vx_u8mf8 (v2, 5, 4);
+ vuint8mf8_t v4 = __riscv_vremu_vx_u8mf8_tu (v3, v2, 5, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** ...
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vremu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vremu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_m (mask, in, 4);
+ vuint8mf8_t v3 = __riscv_vremu_vx_u8mf8 (v2, 5, 4);
+ vuint8mf8_t v4 = __riscv_vremu_vx_u8mf8_m (mask, v3, 5, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vremu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vremu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_tumu (mask, v, in, 4);
+ vuint8mf8_t v3 = __riscv_vremu_vx_u8mf8 (v2, 5, 4);
+ vuint8mf8_t v4 = __riscv_vremu_vx_u8mf8_tumu (mask, v3, v2, 5, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,132 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** vsetivli\tzero,4,e32,m1,tu,ma
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vremu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vremu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, uint32_t x)
+{
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_tu (v, in, 4);
+ vuint32m1_t v3 = __riscv_vremu_vx_u32m1 (v2, x, 4);
+ vuint32m1_t v4 = __riscv_vremu_vx_u32m1_tu (v3, v2, x, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,ta,ma
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vremu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vremu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, uint32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_m (mask, in, 4);
+ vuint32m1_t v3 = __riscv_vremu_vx_u32m1 (v2, x, 4);
+ vuint32m1_t v4 = __riscv_vremu_vx_u32m1_m (mask, v3, x, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,tu,mu
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vremu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vremu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, uint32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_tumu (mask, v, in, 4);
+ vuint32m1_t v3 = __riscv_vremu_vx_u32m1 (v2, x, 4);
+ vuint32m1_t v4 = __riscv_vremu_vx_u32m1_tumu (mask, v3, v2, x, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vremu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vremu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, uint8_t x)
+{
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_tu (v, in, 4);
+ vuint8mf8_t v3 = __riscv_vremu_vx_u8mf8 (v2, x, 4);
+ vuint8mf8_t v4 = __riscv_vremu_vx_u8mf8_tu (v3, v2, x, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vremu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vremu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, uint8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_m (mask, in, 4);
+ vuint8mf8_t v3 = __riscv_vremu_vx_u8mf8 (v2, x, 4);
+ vuint8mf8_t v4 = __riscv_vremu_vx_u8mf8_m (mask, v3, x, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vremu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vremu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, uint8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_tumu (mask, v, in, 4);
+ vuint8mf8_t v3 = __riscv_vremu_vx_u8mf8 (v2, x, 4);
+ vuint8mf8_t v4 = __riscv_vremu_vx_u8mf8_tumu (mask, v3, v2, x, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,160 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** ...
+** vsetivli\tzero,4,e32,m1,tu,ma
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vremu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vremu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_tu (v, in, 4);
+ vuint32m1_t v3 = __riscv_vremu_vx_u32m1 (v2, 5, 4);
+ vuint32m1_t v4 = __riscv_vremu_vx_u32m1_tu (v3, v2, 5, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e32,m1,ta,ma
+** ...
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vremu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vremu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_m (mask, in, 4);
+ vuint32m1_t v3 = __riscv_vremu_vx_u32m1 (v2, 5, 4);
+ vuint32m1_t v4 = __riscv_vremu_vx_u32m1_m (mask, v3, 5, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e32,m1,tu,mu
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vremu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vremu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_tumu (mask, v, in, 4);
+ vuint32m1_t v3 = __riscv_vremu_vx_u32m1 (v2, 5, 4);
+ vuint32m1_t v4 = __riscv_vremu_vx_u32m1_tumu (mask, v3, v2, 5, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** ...
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vremu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vremu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_tu (v, in, 4);
+ vuint8mf8_t v3 = __riscv_vremu_vx_u8mf8 (v2, 5, 4);
+ vuint8mf8_t v4 = __riscv_vremu_vx_u8mf8_tu (v3, v2, 5, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** ...
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vremu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vremu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_m (mask, in, 4);
+ vuint8mf8_t v3 = __riscv_vremu_vx_u8mf8 (v2, 5, 4);
+ vuint8mf8_t v4 = __riscv_vremu_vx_u8mf8_m (mask, v3, 5, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vremu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vremu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_tumu (mask, v, in, 4);
+ vuint8mf8_t v3 = __riscv_vremu_vx_u8mf8 (v2, 5, 4);
+ vuint8mf8_t v4 = __riscv_vremu_vx_u8mf8_tumu (mask, v3, v2, 5, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,132 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** vsetivli\tzero,4,e32,m1,tu,ma
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vremu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vremu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, uint32_t x)
+{
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_tu (v, in, 4);
+ vuint32m1_t v3 = __riscv_vremu_vx_u32m1 (v2, x, 4);
+ vuint32m1_t v4 = __riscv_vremu_vx_u32m1_tu (v3, v2, x, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,ta,ma
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vremu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vremu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, uint32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_m (mask, in, 4);
+ vuint32m1_t v3 = __riscv_vremu_vx_u32m1 (v2, x, 4);
+ vuint32m1_t v4 = __riscv_vremu_vx_u32m1_m (mask, v3, x, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,tu,mu
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vremu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vremu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, uint32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vuint32m1_t v = __riscv_vle32_v_u32m1 (in, 4);
+ vuint32m1_t v2 = __riscv_vle32_v_u32m1_tumu (mask, v, in, 4);
+ vuint32m1_t v3 = __riscv_vremu_vx_u32m1 (v2, x, 4);
+ vuint32m1_t v4 = __riscv_vremu_vx_u32m1_tumu (mask, v3, v2, x, 4);
+ __riscv_vse32_v_u32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vremu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vremu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, uint8_t x)
+{
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_tu (v, in, 4);
+ vuint8mf8_t v3 = __riscv_vremu_vx_u8mf8 (v2, x, 4);
+ vuint8mf8_t v4 = __riscv_vremu_vx_u8mf8_tu (v3, v2, x, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vremu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vremu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, uint8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_m (mask, in, 4);
+ vuint8mf8_t v3 = __riscv_vremu_vx_u8mf8 (v2, x, 4);
+ vuint8mf8_t v4 = __riscv_vremu_vx_u8mf8_m (mask, v3, x, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vremu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vremu\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, uint8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vuint8mf8_t v = __riscv_vle8_v_u8mf8 (in, 4);
+ vuint8mf8_t v2 = __riscv_vle8_v_u8mf8_tumu (mask, v, in, 4);
+ vuint8mf8_t v3 = __riscv_vremu_vx_u8mf8 (v2, x, 4);
+ vuint8mf8_t v4 = __riscv_vremu_vx_u8mf8_tumu (mask, v3, v2, x, 4);
+ __riscv_vse8_v_u8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,132 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** vsetivli\tzero,4,e32,m1,tu,ma
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
+ vint32m1_t v3 = __riscv_vsub_vx_i32m1 (v2, x, 4);
+ vint32m1_t v4 = __riscv_vsub_vx_i32m1_tu (v3, v2, x, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,ta,ma
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vsub\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
+ vint32m1_t v3 = __riscv_vsub_vx_i32m1 (v2, x, 4);
+ vint32m1_t v4 = __riscv_vsub_vx_i32m1_m (mask, v3, x, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,tu,mu
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vsub\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tumu (mask, v, in, 4);
+ vint32m1_t v3 = __riscv_vsub_vx_i32m1 (v2, x, 4);
+ vint32m1_t v4 = __riscv_vsub_vx_i32m1_tumu (mask, v3, v2, x, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in, 4);
+ vint8mf8_t v3 = __riscv_vsub_vx_i8mf8 (v2, x, 4);
+ vint8mf8_t v4 = __riscv_vsub_vx_i8mf8_tu (v3, v2, x, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vsub\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_m (mask, in, 4);
+ vint8mf8_t v3 = __riscv_vsub_vx_i8mf8 (v2, x, 4);
+ vint8mf8_t v4 = __riscv_vsub_vx_i8mf8_m (mask, v3, x, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vsub\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in, 4);
+ vint8mf8_t v3 = __riscv_vsub_vx_i8mf8 (v2, x, 4);
+ vint8mf8_t v4 = __riscv_vsub_vx_i8mf8_tumu (mask, v3, v2, x, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,132 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** vsetivli\tzero,4,e32,m1,tu,ma
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
+ vint32m1_t v3 = __riscv_vsub_vx_i32m1 (v2, -15, 4);
+ vint32m1_t v4 = __riscv_vsub_vx_i32m1_tu (v3, v2, -15, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,ta,ma
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vadd\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*15,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
+ vint32m1_t v3 = __riscv_vsub_vx_i32m1 (v2, -15, 4);
+ vint32m1_t v4 = __riscv_vsub_vx_i32m1_m (mask, v3, -15, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,tu,mu
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vadd\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*15,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tumu (mask, v, in, 4);
+ vint32m1_t v3 = __riscv_vsub_vx_i32m1 (v2, -15, 4);
+ vint32m1_t v4 = __riscv_vsub_vx_i32m1_tumu (mask, v3, v2, -15, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in, 4);
+ vint8mf8_t v3 = __riscv_vsub_vx_i8mf8 (v2, -15, 4);
+ vint8mf8_t v4 = __riscv_vsub_vx_i8mf8_tu (v3, v2, -15, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vadd\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*15,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_m (mask, in, 4);
+ vint8mf8_t v3 = __riscv_vsub_vx_i8mf8 (v2, -15, 4);
+ vint8mf8_t v4 = __riscv_vsub_vx_i8mf8_m (mask, v3, -15, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vadd\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*15,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in, 4);
+ vint8mf8_t v3 = __riscv_vsub_vx_i8mf8 (v2, -15, 4);
+ vint8mf8_t v4 = __riscv_vsub_vx_i8mf8_tumu (mask, v3, v2, -15, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,132 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** vsetivli\tzero,4,e32,m1,tu,ma
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
+ vint32m1_t v3 = __riscv_vsub_vx_i32m1 (v2, 16, 4);
+ vint32m1_t v4 = __riscv_vsub_vx_i32m1_tu (v3, v2, 16, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,ta,ma
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vadd\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*-16,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
+ vint32m1_t v3 = __riscv_vsub_vx_i32m1 (v2, 16, 4);
+ vint32m1_t v4 = __riscv_vsub_vx_i32m1_m (mask, v3, 16, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,tu,mu
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vadd\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*-16,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tumu (mask, v, in, 4);
+ vint32m1_t v3 = __riscv_vsub_vx_i32m1 (v2, 16, 4);
+ vint32m1_t v4 = __riscv_vsub_vx_i32m1_tumu (mask, v3, v2, 16, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in, 4);
+ vint8mf8_t v3 = __riscv_vsub_vx_i8mf8 (v2, 16, 4);
+ vint8mf8_t v4 = __riscv_vsub_vx_i8mf8_tu (v3, v2, 16, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vadd\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*-16,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_m (mask, in, 4);
+ vint8mf8_t v3 = __riscv_vsub_vx_i8mf8 (v2, 16, 4);
+ vint8mf8_t v4 = __riscv_vsub_vx_i8mf8_m (mask, v3, 16, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vadd\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*-16,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in, 4);
+ vint8mf8_t v3 = __riscv_vsub_vx_i8mf8 (v2, 16, 4);
+ vint8mf8_t v4 = __riscv_vsub_vx_i8mf8_tumu (mask, v3, v2, 16, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,160 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** ...
+** vsetivli\tzero,4,e32,m1,tu,ma
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
+ vint32m1_t v3 = __riscv_vadd_vx_i32m1 (v2, 16, 4);
+ vint32m1_t v4 = __riscv_vadd_vx_i32m1_tu (v3, v2, 16, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e32,m1,ta,ma
+** ...
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vadd\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
+ vint32m1_t v3 = __riscv_vadd_vx_i32m1 (v2, 16, 4);
+ vint32m1_t v4 = __riscv_vadd_vx_i32m1_m (mask, v3, 16, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e32,m1,tu,mu
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vadd\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tumu (mask, v, in, 4);
+ vint32m1_t v3 = __riscv_vadd_vx_i32m1 (v2, 16, 4);
+ vint32m1_t v4 = __riscv_vadd_vx_i32m1_tumu (mask, v3, v2, 16, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** ...
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in, 4);
+ vint8mf8_t v3 = __riscv_vadd_vx_i8mf8 (v2, 16, 4);
+ vint8mf8_t v4 = __riscv_vadd_vx_i8mf8_tu (v3, v2, 16, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** ...
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vadd\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_m (mask, in, 4);
+ vint8mf8_t v3 = __riscv_vadd_vx_i8mf8 (v2, 16, 4);
+ vint8mf8_t v4 = __riscv_vadd_vx_i8mf8_m (mask, v3, 16, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vadd\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in, 4);
+ vint8mf8_t v3 = __riscv_vadd_vx_i8mf8 (v2, 16, 4);
+ vint8mf8_t v4 = __riscv_vadd_vx_i8mf8_tumu (mask, v3, v2, 16, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,160 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** ...
+** vsetivli\tzero,4,e32,m1,tu,ma
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
+ vint32m1_t v3 = __riscv_vadd_vx_i32m1 (v2, 17, 4);
+ vint32m1_t v4 = __riscv_vadd_vx_i32m1_tu (v3, v2, 17, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e32,m1,ta,ma
+** ...
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vadd\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
+ vint32m1_t v3 = __riscv_vadd_vx_i32m1 (v2, 17, 4);
+ vint32m1_t v4 = __riscv_vadd_vx_i32m1_m (mask, v3, 17, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e32,m1,tu,mu
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vadd\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tumu (mask, v, in, 4);
+ vint32m1_t v3 = __riscv_vadd_vx_i32m1 (v2, 17, 4);
+ vint32m1_t v4 = __riscv_vadd_vx_i32m1_tumu (mask, v3, v2, 17, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** ...
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in, 4);
+ vint8mf8_t v3 = __riscv_vadd_vx_i8mf8 (v2, 17, 4);
+ vint8mf8_t v4 = __riscv_vadd_vx_i8mf8_tu (v3, v2, 17, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** ...
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vadd\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_m (mask, in, 4);
+ vint8mf8_t v3 = __riscv_vadd_vx_i8mf8 (v2, 17, 4);
+ vint8mf8_t v4 = __riscv_vadd_vx_i8mf8_m (mask, v3, 17, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vadd\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in, 4);
+ vint8mf8_t v3 = __riscv_vadd_vx_i8mf8 (v2, 17, 4);
+ vint8mf8_t v4 = __riscv_vadd_vx_i8mf8_tumu (mask, v3, v2, 17, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,132 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** vsetivli\tzero,4,e32,m1,tu,ma
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vrsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vrsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
+ vint32m1_t v3 = __riscv_vrsub_vx_i32m1 (v2, x, 4);
+ vint32m1_t v4 = __riscv_vrsub_vx_i32m1_tu (v3, v2, x, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,ta,ma
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vrsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vrsub\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
+ vint32m1_t v3 = __riscv_vrsub_vx_i32m1 (v2, x, 4);
+ vint32m1_t v4 = __riscv_vrsub_vx_i32m1_m (mask, v3, x, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,tu,mu
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vrsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vrsub\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tumu (mask, v, in, 4);
+ vint32m1_t v3 = __riscv_vrsub_vx_i32m1 (v2, x, 4);
+ vint32m1_t v4 = __riscv_vrsub_vx_i32m1_tumu (mask, v3, v2, x, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vrsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vrsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in, 4);
+ vint8mf8_t v3 = __riscv_vrsub_vx_i8mf8 (v2, x, 4);
+ vint8mf8_t v4 = __riscv_vrsub_vx_i8mf8_tu (v3, v2, x, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vrsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vrsub\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_m (mask, in, 4);
+ vint8mf8_t v3 = __riscv_vrsub_vx_i8mf8 (v2, x, 4);
+ vint8mf8_t v4 = __riscv_vrsub_vx_i8mf8_m (mask, v3, x, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vrsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vrsub\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in, 4);
+ vint8mf8_t v3 = __riscv_vrsub_vx_i8mf8 (v2, x, 4);
+ vint8mf8_t v4 = __riscv_vrsub_vx_i8mf8_tumu (mask, v3, v2, x, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,132 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** vsetivli\tzero,4,e32,m1,tu,ma
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vrsub\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vrsub\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
+ vint32m1_t v3 = __riscv_vrsub_vx_i32m1 (v2, -16, 4);
+ vint32m1_t v4 = __riscv_vrsub_vx_i32m1_tu (v3, v2, -16, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,ta,ma
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vrsub\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vrsub\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*-16,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
+ vint32m1_t v3 = __riscv_vrsub_vx_i32m1 (v2, -16, 4);
+ vint32m1_t v4 = __riscv_vrsub_vx_i32m1_m (mask, v3, -16, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,tu,mu
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vrsub\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vrsub\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*-16,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tumu (mask, v, in, 4);
+ vint32m1_t v3 = __riscv_vrsub_vx_i32m1 (v2, -16, 4);
+ vint32m1_t v4 = __riscv_vrsub_vx_i32m1_tumu (mask, v3, v2, -16, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vrsub\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vrsub\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in, 4);
+ vint8mf8_t v3 = __riscv_vrsub_vx_i8mf8 (v2, -16, 4);
+ vint8mf8_t v4 = __riscv_vrsub_vx_i8mf8_tu (v3, v2, -16, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vrsub\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vrsub\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*-16,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_m (mask, in, 4);
+ vint8mf8_t v3 = __riscv_vrsub_vx_i8mf8 (v2, -16, 4);
+ vint8mf8_t v4 = __riscv_vrsub_vx_i8mf8_m (mask, v3, -16, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vrsub\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vrsub\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*-16,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in, 4);
+ vint8mf8_t v3 = __riscv_vrsub_vx_i8mf8 (v2, -16, 4);
+ vint8mf8_t v4 = __riscv_vrsub_vx_i8mf8_tumu (mask, v3, v2, -16, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,132 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** vsetivli\tzero,4,e32,m1,tu,ma
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vrsub\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vrsub\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
+ vint32m1_t v3 = __riscv_vrsub_vx_i32m1 (v2, 15, 4);
+ vint32m1_t v4 = __riscv_vrsub_vx_i32m1_tu (v3, v2, 15, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,ta,ma
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vrsub\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vrsub\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*15,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
+ vint32m1_t v3 = __riscv_vrsub_vx_i32m1 (v2, 15, 4);
+ vint32m1_t v4 = __riscv_vrsub_vx_i32m1_m (mask, v3, 15, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,tu,mu
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vrsub\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vrsub\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*15,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tumu (mask, v, in, 4);
+ vint32m1_t v3 = __riscv_vrsub_vx_i32m1 (v2, 15, 4);
+ vint32m1_t v4 = __riscv_vrsub_vx_i32m1_tumu (mask, v3, v2, 15, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vrsub\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vrsub\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in, 4);
+ vint8mf8_t v3 = __riscv_vrsub_vx_i8mf8 (v2, 15, 4);
+ vint8mf8_t v4 = __riscv_vrsub_vx_i8mf8_tu (v3, v2, 15, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vrsub\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vrsub\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*15,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_m (mask, in, 4);
+ vint8mf8_t v3 = __riscv_vrsub_vx_i8mf8 (v2, 15, 4);
+ vint8mf8_t v4 = __riscv_vrsub_vx_i8mf8_m (mask, v3, 15, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vrsub\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vrsub\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*15,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in, 4);
+ vint8mf8_t v3 = __riscv_vrsub_vx_i8mf8 (v2, 15, 4);
+ vint8mf8_t v4 = __riscv_vrsub_vx_i8mf8_tumu (mask, v3, v2, 15, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,160 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** ...
+** vsetivli\tzero,4,e32,m1,tu,ma
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vrsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vrsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
+ vint32m1_t v3 = __riscv_vrsub_vx_i32m1 (v2, 16, 4);
+ vint32m1_t v4 = __riscv_vrsub_vx_i32m1_tu (v3, v2, 16, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e32,m1,ta,ma
+** ...
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vrsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vrsub\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
+ vint32m1_t v3 = __riscv_vrsub_vx_i32m1 (v2, 16, 4);
+ vint32m1_t v4 = __riscv_vrsub_vx_i32m1_m (mask, v3, 16, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e32,m1,tu,mu
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vrsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vrsub\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tumu (mask, v, in, 4);
+ vint32m1_t v3 = __riscv_vrsub_vx_i32m1 (v2, 16, 4);
+ vint32m1_t v4 = __riscv_vrsub_vx_i32m1_tumu (mask, v3, v2, 16, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** ...
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vrsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vrsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in, 4);
+ vint8mf8_t v3 = __riscv_vrsub_vx_i8mf8 (v2, 16, 4);
+ vint8mf8_t v4 = __riscv_vrsub_vx_i8mf8_tu (v3, v2, 16, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** ...
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vrsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vrsub\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_m (mask, in, 4);
+ vint8mf8_t v3 = __riscv_vrsub_vx_i8mf8 (v2, 16, 4);
+ vint8mf8_t v4 = __riscv_vrsub_vx_i8mf8_m (mask, v3, 16, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vrsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vrsub\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in, 4);
+ vint8mf8_t v3 = __riscv_vrsub_vx_i8mf8 (v2, 16, 4);
+ vint8mf8_t v4 = __riscv_vrsub_vx_i8mf8_tumu (mask, v3, v2, 16, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,123 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f0:
+** ...
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** ...
+** ret
+*/
+void f0 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vadd_vx_i64m1 (v2, -16, 4);
+ vint64m1_t v4 = __riscv_vadd_vx_i64m1 (v3, -16, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f1:
+** ...
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** ...
+** ret
+*/
+void f1 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vadd_vx_i64m1 (v2, 15, 4);
+ vint64m1_t v4 = __riscv_vadd_vx_i64m1 (v3, 15, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f2:
+** ...
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f2 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vadd_vx_i64m1 (v2, 16, 4);
+ vint64m1_t v4 = __riscv_vadd_vx_i64m1 (v3, 16, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f3:
+** ...
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f3 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vadd_vx_i64m1 (v2, 0xAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vadd_vx_i64m1 (v3, 0xAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f4:
+** ...
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f4 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vadd_vx_i64m1 (v2, 0xAAAAAAAAAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vadd_vx_i64m1 (v3, 0xAAAAAAAAAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f5:
+** ...
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f5 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vadd_vx_i64m1 (v2, 0xAAAAAAAAAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vadd_vx_i64m1 (v3, 0xAAAAAAAAAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f6:
+** ...
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f6 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vadd_vx_i64m1 (v2, x, 4);
+ vint64m1_t v4 = __riscv_vadd_vx_i64m1 (v3, x, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,72 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f0:
+** ...
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** ...
+** ret
+*/
+void f0 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vadd_vx_i64m1 (v2, -16, 4);
+ vint64m1_t v4 = __riscv_vadd_vx_i64m1 (v3, -16, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f1:
+** ...
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** ...
+** ret
+*/
+void f1 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vadd_vx_i64m1 (v2, 15, 4);
+ vint64m1_t v4 = __riscv_vadd_vx_i64m1 (v3, 15, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f2:
+** ...
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f2 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vadd_vx_i64m1 (v2, 16, 4);
+ vint64m1_t v4 = __riscv_vadd_vx_i64m1 (v3, 16, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f3:
+** ...
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vadd\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f3 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vadd_vx_i64m1 (v2, 0xAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vadd_vx_i64m1 (v3, 0xAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vadd_vx_i64m1 (v2, 0xAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vadd_vx_i64m1_tu (v3, v2, 0xAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero} 1 } } */
+/* { dg-final { scan-assembler-times {vadd\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vadd_vx_i64m1 (v2, 0xAAAAAAAAAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vadd_vx_i64m1_tu (v3, v2, 0xAAAAAAAAAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero} 1 } } */
+/* { dg-final { scan-assembler-times {vadd\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vadd_vx_i64m1 (v2, x, 4);
+ vint64m1_t v4 = __riscv_vadd_vx_i64m1_tu (v3, v2, x, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero} 1 } } */
+/* { dg-final { scan-assembler-times {vadd\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,132 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** vsetivli\tzero,4,e32,m1,tu,ma
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vxor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vxor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
+ vint32m1_t v3 = __riscv_vxor_vx_i32m1 (v2, x, 4);
+ vint32m1_t v4 = __riscv_vxor_vx_i32m1_tu (v3, v2, x, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,ta,ma
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vxor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vxor\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
+ vint32m1_t v3 = __riscv_vxor_vx_i32m1 (v2, x, 4);
+ vint32m1_t v4 = __riscv_vxor_vx_i32m1_m (mask, v3, x, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,tu,mu
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vxor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vxor\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tumu (mask, v, in, 4);
+ vint32m1_t v3 = __riscv_vxor_vx_i32m1 (v2, x, 4);
+ vint32m1_t v4 = __riscv_vxor_vx_i32m1_tumu (mask, v3, v2, x, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vxor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vxor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in, 4);
+ vint8mf8_t v3 = __riscv_vxor_vx_i8mf8 (v2, x, 4);
+ vint8mf8_t v4 = __riscv_vxor_vx_i8mf8_tu (v3, v2, x, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vxor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vxor\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_m (mask, in, 4);
+ vint8mf8_t v3 = __riscv_vxor_vx_i8mf8 (v2, x, 4);
+ vint8mf8_t v4 = __riscv_vxor_vx_i8mf8_m (mask, v3, x, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vxor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vxor\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in, 4);
+ vint8mf8_t v3 = __riscv_vxor_vx_i8mf8 (v2, x, 4);
+ vint8mf8_t v4 = __riscv_vxor_vx_i8mf8_tumu (mask, v3, v2, x, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, int32_t x, int n)
+{
+ for (int i = 0; i < n; i++) {
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + i + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + i + 2, 4);
+ vint64m1_t v3 = __riscv_vadd_vx_i64m1 (v2, x, 4);
+ vint64m1_t v4 = __riscv_vadd_vx_i64m1_tu (v3, v2, x, 4);
+ __riscv_vse64_v_i64m1 (out + i + 2, v4, 4);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero\s+\.L[0-9]+\:\s+} 1 } } */
+/* { dg-final { scan-assembler-times {vadd\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,123 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f0:
+** ...
+** vand\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vand\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** ...
+** ret
+*/
+void f0 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vand_vx_i64m1 (v2, -16, 4);
+ vint64m1_t v4 = __riscv_vand_vx_i64m1 (v3, -16, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f1:
+** ...
+** vand\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vand\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** ...
+** ret
+*/
+void f1 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vand_vx_i64m1 (v2, 15, 4);
+ vint64m1_t v4 = __riscv_vand_vx_i64m1 (v3, 15, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f2:
+** ...
+** vand\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vand\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f2 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vand_vx_i64m1 (v2, 16, 4);
+ vint64m1_t v4 = __riscv_vand_vx_i64m1 (v3, 16, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f3:
+** ...
+** vand\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vand\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f3 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vand_vx_i64m1 (v2, 0xAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vand_vx_i64m1 (v3, 0xAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f4:
+** ...
+** vand\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vand\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f4 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vand_vx_i64m1 (v2, 0xAAAAAAAAAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vand_vx_i64m1 (v3, 0xAAAAAAAAAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f5:
+** ...
+** vand\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vand\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f5 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vand_vx_i64m1 (v2, 0xAAAAAAAAAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vand_vx_i64m1 (v3, 0xAAAAAAAAAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f6:
+** ...
+** vand\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vand\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f6 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vand_vx_i64m1 (v2, x, 4);
+ vint64m1_t v4 = __riscv_vand_vx_i64m1 (v3, x, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,72 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f0:
+** ...
+** vand\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vand\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** ...
+** ret
+*/
+void f0 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vand_vx_i64m1 (v2, -16, 4);
+ vint64m1_t v4 = __riscv_vand_vx_i64m1 (v3, -16, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f1:
+** ...
+** vand\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vand\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** ...
+** ret
+*/
+void f1 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vand_vx_i64m1 (v2, 15, 4);
+ vint64m1_t v4 = __riscv_vand_vx_i64m1 (v3, 15, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f2:
+** ...
+** vand\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vand\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f2 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vand_vx_i64m1 (v2, 16, 4);
+ vint64m1_t v4 = __riscv_vand_vx_i64m1 (v3, 16, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f3:
+** ...
+** vand\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vand\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f3 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vand_vx_i64m1 (v2, 0xAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vand_vx_i64m1 (v3, 0xAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vand_vx_i64m1 (v2, 0xAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vand_vx_i64m1_tu (v3, v2, 0xAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero} 1 } } */
+/* { dg-final { scan-assembler-times {vand\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vand_vx_i64m1 (v2, 0xAAAAAAAAAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vand_vx_i64m1_tu (v3, v2, 0xAAAAAAAAAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero} 1 } } */
+/* { dg-final { scan-assembler-times {vand\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vand_vx_i64m1 (v2, x, 4);
+ vint64m1_t v4 = __riscv_vand_vx_i64m1_tu (v3, v2, x, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero} 1 } } */
+/* { dg-final { scan-assembler-times {vand\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, int64_t x, int n)
+{
+ for (int i = 0; i < n; i++) {
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + i + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + i + 2, 4);
+ vint64m1_t v3 = __riscv_vand_vx_i64m1 (v2, x, 4);
+ vint64m1_t v4 = __riscv_vand_vx_i64m1_tu (v3, v2, x, 4);
+ __riscv_vse64_v_i64m1 (out + i + 2, v4, 4);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero\s+\.L[0-9]+\:\s+} 1 } } */
+/* { dg-final { scan-assembler-times {vand\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,123 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f0:
+** ...
+** vor\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vor\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** ...
+** ret
+*/
+void f0 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vor_vx_i64m1 (v2, -16, 4);
+ vint64m1_t v4 = __riscv_vor_vx_i64m1 (v3, -16, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f1:
+** ...
+** vor\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vor\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** ...
+** ret
+*/
+void f1 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vor_vx_i64m1 (v2, 15, 4);
+ vint64m1_t v4 = __riscv_vor_vx_i64m1 (v3, 15, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f2:
+** ...
+** vor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f2 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vor_vx_i64m1 (v2, 16, 4);
+ vint64m1_t v4 = __riscv_vor_vx_i64m1 (v3, 16, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f3:
+** ...
+** vor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f3 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vor_vx_i64m1 (v2, 0xAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vor_vx_i64m1 (v3, 0xAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f4:
+** ...
+** vor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f4 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vor_vx_i64m1 (v2, 0xAAAAAAAAAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vor_vx_i64m1 (v3, 0xAAAAAAAAAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f5:
+** ...
+** vor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f5 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vor_vx_i64m1 (v2, 0xAAAAAAAAAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vor_vx_i64m1 (v3, 0xAAAAAAAAAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f6:
+** ...
+** vor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f6 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vor_vx_i64m1 (v2, x, 4);
+ vint64m1_t v4 = __riscv_vor_vx_i64m1 (v3, x, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,72 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f0:
+** ...
+** vor\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vor\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** ...
+** ret
+*/
+void f0 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vor_vx_i64m1 (v2, -16, 4);
+ vint64m1_t v4 = __riscv_vor_vx_i64m1 (v3, -16, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f1:
+** ...
+** vor\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vor\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** ...
+** ret
+*/
+void f1 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vor_vx_i64m1 (v2, 15, 4);
+ vint64m1_t v4 = __riscv_vor_vx_i64m1 (v3, 15, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f2:
+** ...
+** vor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f2 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vor_vx_i64m1 (v2, 16, 4);
+ vint64m1_t v4 = __riscv_vor_vx_i64m1 (v3, 16, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f3:
+** ...
+** vor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f3 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vor_vx_i64m1 (v2, 0xAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vor_vx_i64m1 (v3, 0xAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vor_vx_i64m1 (v2, 0xAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vor_vx_i64m1_tu (v3, v2, 0xAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero} 1 } } */
+/* { dg-final { scan-assembler-times {vor\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,132 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** vsetivli\tzero,4,e32,m1,tu,ma
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vxor\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vxor\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
+ vint32m1_t v3 = __riscv_vxor_vx_i32m1 (v2, -16, 4);
+ vint32m1_t v4 = __riscv_vxor_vx_i32m1_tu (v3, v2, -16, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,ta,ma
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vxor\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vxor\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*-16,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
+ vint32m1_t v3 = __riscv_vxor_vx_i32m1 (v2, -16, 4);
+ vint32m1_t v4 = __riscv_vxor_vx_i32m1_m (mask, v3, -16, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,tu,mu
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vxor\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vxor\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*-16,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tumu (mask, v, in, 4);
+ vint32m1_t v3 = __riscv_vxor_vx_i32m1 (v2, -16, 4);
+ vint32m1_t v4 = __riscv_vxor_vx_i32m1_tumu (mask, v3, v2, -16, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vxor\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vxor\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in, 4);
+ vint8mf8_t v3 = __riscv_vxor_vx_i8mf8 (v2, -16, 4);
+ vint8mf8_t v4 = __riscv_vxor_vx_i8mf8_tu (v3, v2, -16, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vxor\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vxor\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*-16,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_m (mask, in, 4);
+ vint8mf8_t v3 = __riscv_vxor_vx_i8mf8 (v2, -16, 4);
+ vint8mf8_t v4 = __riscv_vxor_vx_i8mf8_m (mask, v3, -16, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vxor\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vxor\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*-16,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in, 4);
+ vint8mf8_t v3 = __riscv_vxor_vx_i8mf8 (v2, -16, 4);
+ vint8mf8_t v4 = __riscv_vxor_vx_i8mf8_tumu (mask, v3, v2, -16, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vor_vx_i64m1 (v2, 0xAAAAAAAAAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vor_vx_i64m1_tu (v3, v2, 0xAAAAAAAAAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero} 1 } } */
+/* { dg-final { scan-assembler-times {vor\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vor_vx_i64m1 (v2, x, 4);
+ vint64m1_t v4 = __riscv_vor_vx_i64m1_tu (v3, v2, x, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero} 1 } } */
+/* { dg-final { scan-assembler-times {vor\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, int64_t x, int n)
+{
+ for (int i = 0; i < n; i++) {
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + i + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + i + 2, 4);
+ vint64m1_t v3 = __riscv_vor_vx_i64m1 (v2, x, 4);
+ vint64m1_t v4 = __riscv_vor_vx_i64m1_tu (v3, v2, x, 4);
+ __riscv_vse64_v_i64m1 (out + i + 2, v4, 4);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero\s+\.L[0-9]+\:\s+} 1 } } */
+/* { dg-final { scan-assembler-times {vor\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,123 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f0:
+** ...
+** vxor\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vxor\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** ...
+** ret
+*/
+void f0 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vxor_vx_i64m1 (v2, -16, 4);
+ vint64m1_t v4 = __riscv_vxor_vx_i64m1 (v3, -16, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f1:
+** ...
+** vxor\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vxor\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** ...
+** ret
+*/
+void f1 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vxor_vx_i64m1 (v2, 15, 4);
+ vint64m1_t v4 = __riscv_vxor_vx_i64m1 (v3, 15, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f2:
+** ...
+** vxor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vxor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f2 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vxor_vx_i64m1 (v2, 16, 4);
+ vint64m1_t v4 = __riscv_vxor_vx_i64m1 (v3, 16, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f3:
+** ...
+** vxor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vxor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f3 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vxor_vx_i64m1 (v2, 0xAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vxor_vx_i64m1 (v3, 0xAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f4:
+** ...
+** vxor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vxor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f4 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vxor_vx_i64m1 (v2, 0xAAAAAAAAAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vxor_vx_i64m1 (v3, 0xAAAAAAAAAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f5:
+** ...
+** vxor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vxor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f5 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vxor_vx_i64m1 (v2, 0xAAAAAAAAAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vxor_vx_i64m1 (v3, 0xAAAAAAAAAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f6:
+** ...
+** vxor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vxor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f6 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vxor_vx_i64m1 (v2, x, 4);
+ vint64m1_t v4 = __riscv_vxor_vx_i64m1 (v3, x, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,72 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f0:
+** ...
+** vxor\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vxor\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** ...
+** ret
+*/
+void f0 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vxor_vx_i64m1 (v2, -16, 4);
+ vint64m1_t v4 = __riscv_vxor_vx_i64m1 (v3, -16, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f1:
+** ...
+** vxor\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vxor\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** ...
+** ret
+*/
+void f1 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vxor_vx_i64m1 (v2, 15, 4);
+ vint64m1_t v4 = __riscv_vxor_vx_i64m1 (v3, 15, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f2:
+** ...
+** vxor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vxor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f2 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vxor_vx_i64m1 (v2, 16, 4);
+ vint64m1_t v4 = __riscv_vxor_vx_i64m1 (v3, 16, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f3:
+** ...
+** vxor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vxor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f3 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vxor_vx_i64m1 (v2, 0xAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vxor_vx_i64m1 (v3, 0xAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vxor_vx_i64m1 (v2, 0xAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vxor_vx_i64m1_tu (v3, v2, 0xAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero} 1 } } */
+/* { dg-final { scan-assembler-times {vxor\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vxor_vx_i64m1 (v2, 0xAAAAAAAAAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vxor_vx_i64m1_tu (v3, v2, 0xAAAAAAAAAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero} 1 } } */
+/* { dg-final { scan-assembler-times {vxor\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vxor_vx_i64m1 (v2, x, 4);
+ vint64m1_t v4 = __riscv_vxor_vx_i64m1_tu (v3, v2, x, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero} 1 } } */
+/* { dg-final { scan-assembler-times {vxor\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, int64_t x, int n)
+{
+ for (int i = 0; i < n; i++) {
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + i + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + i + 2, 4);
+ vint64m1_t v3 = __riscv_vxor_vx_i64m1 (v2, x, 4);
+ vint64m1_t v4 = __riscv_vxor_vx_i64m1_tu (v3, v2, x, 4);
+ __riscv_vse64_v_i64m1 (out + i + 2, v4, 4);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero\s+\.L[0-9]+\:\s+} 1 } } */
+/* { dg-final { scan-assembler-times {vxor\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,123 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f0:
+** ...
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f0 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vmax_vx_i64m1 (v2, -16, 4);
+ vint64m1_t v4 = __riscv_vmax_vx_i64m1 (v3, -16, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f1:
+** ...
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f1 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vmax_vx_i64m1 (v2, 15, 4);
+ vint64m1_t v4 = __riscv_vmax_vx_i64m1 (v3, 15, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f2:
+** ...
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f2 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vmax_vx_i64m1 (v2, 16, 4);
+ vint64m1_t v4 = __riscv_vmax_vx_i64m1 (v3, 16, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f3:
+** ...
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f3 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vmax_vx_i64m1 (v2, 0xAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vmax_vx_i64m1 (v3, 0xAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f4:
+** ...
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f4 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vmax_vx_i64m1 (v2, 0xAAAAAAAAAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vmax_vx_i64m1 (v3, 0xAAAAAAAAAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f5:
+** ...
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f5 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vmax_vx_i64m1 (v2, 0xAAAAAAAAAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vmax_vx_i64m1 (v3, 0xAAAAAAAAAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f6:
+** ...
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f6 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vmax_vx_i64m1 (v2, x, 4);
+ vint64m1_t v4 = __riscv_vmax_vx_i64m1 (v3, x, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,132 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** vsetivli\tzero,4,e32,m1,tu,ma
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vxor\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vxor\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
+ vint32m1_t v3 = __riscv_vxor_vx_i32m1 (v2, 15, 4);
+ vint32m1_t v4 = __riscv_vxor_vx_i32m1_tu (v3, v2, 15, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,ta,ma
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vxor\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vxor\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*15,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
+ vint32m1_t v3 = __riscv_vxor_vx_i32m1 (v2, 15, 4);
+ vint32m1_t v4 = __riscv_vxor_vx_i32m1_m (mask, v3, 15, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,tu,mu
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vxor\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vxor\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*15,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tumu (mask, v, in, 4);
+ vint32m1_t v3 = __riscv_vxor_vx_i32m1 (v2, 15, 4);
+ vint32m1_t v4 = __riscv_vxor_vx_i32m1_tumu (mask, v3, v2, 15, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vxor\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vxor\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in, 4);
+ vint8mf8_t v3 = __riscv_vxor_vx_i8mf8 (v2, 15, 4);
+ vint8mf8_t v4 = __riscv_vxor_vx_i8mf8_tu (v3, v2, 15, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vxor\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vxor\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*15,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_m (mask, in, 4);
+ vint8mf8_t v3 = __riscv_vxor_vx_i8mf8 (v2, 15, 4);
+ vint8mf8_t v4 = __riscv_vxor_vx_i8mf8_m (mask, v3, 15, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vxor\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vxor\.vi\tv[1-9][0-9]?,\s*v[0-9]+,\s*15,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in, 4);
+ vint8mf8_t v3 = __riscv_vxor_vx_i8mf8 (v2, 15, 4);
+ vint8mf8_t v4 = __riscv_vxor_vx_i8mf8_tumu (mask, v3, v2, 15, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,72 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f0:
+** ...
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f0 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vmax_vx_i64m1 (v2, -16, 4);
+ vint64m1_t v4 = __riscv_vmax_vx_i64m1 (v3, -16, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f1:
+** ...
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f1 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vmax_vx_i64m1 (v2, 15, 4);
+ vint64m1_t v4 = __riscv_vmax_vx_i64m1 (v3, 15, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f2:
+** ...
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f2 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vmax_vx_i64m1 (v2, 16, 4);
+ vint64m1_t v4 = __riscv_vmax_vx_i64m1 (v3, 16, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f3:
+** ...
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmax\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f3 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vmax_vx_i64m1 (v2, 0xAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vmax_vx_i64m1 (v3, 0xAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vmax_vx_i64m1 (v2, 0xAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vmax_vx_i64m1_tu (v3, v2, 0xAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero} 1 } } */
+/* { dg-final { scan-assembler-times {vmax\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vmax_vx_i64m1 (v2, 0xAAAAAAAAAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vmax_vx_i64m1_tu (v3, v2, 0xAAAAAAAAAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero} 1 } } */
+/* { dg-final { scan-assembler-times {vmax\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vmax_vx_i64m1 (v2, x, 4);
+ vint64m1_t v4 = __riscv_vmax_vx_i64m1_tu (v3, v2, x, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero} 1 } } */
+/* { dg-final { scan-assembler-times {vmax\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, int64_t x, int n)
+{
+ for (int i = 0; i < n; i++) {
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + i + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + i + 2, 4);
+ vint64m1_t v3 = __riscv_vmax_vx_i64m1 (v2, x, 4);
+ vint64m1_t v4 = __riscv_vmax_vx_i64m1_tu (v3, v2, x, 4);
+ __riscv_vse64_v_i64m1 (out + i + 2, v4, 4);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero\s+\.L[0-9]+\:\s+} 1 } } */
+/* { dg-final { scan-assembler-times {vmax\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,72 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f0:
+** ...
+** vmin\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmin\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f0 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vmin_vx_i64m1 (v2, -16, 4);
+ vint64m1_t v4 = __riscv_vmin_vx_i64m1 (v3, -16, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f1:
+** ...
+** vmin\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmin\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f1 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vmin_vx_i64m1 (v2, 15, 4);
+ vint64m1_t v4 = __riscv_vmin_vx_i64m1 (v3, 15, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f2:
+** ...
+** vmin\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmin\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f2 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vmin_vx_i64m1 (v2, 16, 4);
+ vint64m1_t v4 = __riscv_vmin_vx_i64m1 (v3, 16, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f3:
+** ...
+** vmin\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmin\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f3 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vmin_vx_i64m1 (v2, 0xAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vmin_vx_i64m1 (v3, 0xAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vmin_vx_i64m1 (v2, 0xAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vmin_vx_i64m1_tu (v3, v2, 0xAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero} 1 } } */
+/* { dg-final { scan-assembler-times {vmin\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vmin_vx_i64m1 (v2, 0xAAAAAAAAAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vmin_vx_i64m1_tu (v3, v2, 0xAAAAAAAAAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero} 1 } } */
+/* { dg-final { scan-assembler-times {vmin\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vmin_vx_i64m1 (v2, x, 4);
+ vint64m1_t v4 = __riscv_vmin_vx_i64m1_tu (v3, v2, x, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero} 1 } } */
+/* { dg-final { scan-assembler-times {vmin\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, int64_t x, int n)
+{
+ for (int i = 0; i < n; i++) {
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + i + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + i + 2, 4);
+ vint64m1_t v3 = __riscv_vmin_vx_i64m1 (v2, x, 4);
+ vint64m1_t v4 = __riscv_vmin_vx_i64m1_tu (v3, v2, x, 4);
+ __riscv_vse64_v_i64m1 (out + i + 2, v4, 4);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero\s+\.L[0-9]+\:\s+} 1 } } */
+/* { dg-final { scan-assembler-times {vmin\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,160 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** ...
+** vsetivli\tzero,4,e32,m1,tu,ma
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vxor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vxor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
+ vint32m1_t v3 = __riscv_vxor_vx_i32m1 (v2, 16, 4);
+ vint32m1_t v4 = __riscv_vxor_vx_i32m1_tu (v3, v2, 16, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e32,m1,ta,ma
+** ...
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vxor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vxor\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
+ vint32m1_t v3 = __riscv_vxor_vx_i32m1 (v2, 16, 4);
+ vint32m1_t v4 = __riscv_vxor_vx_i32m1_m (mask, v3, 16, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e32,m1,tu,mu
+** ...
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vxor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vxor\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tumu (mask, v, in, 4);
+ vint32m1_t v3 = __riscv_vxor_vx_i32m1 (v2, 16, 4);
+ vint32m1_t v4 = __riscv_vxor_vx_i32m1_tumu (mask, v3, v2, 16, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** ...
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vxor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vxor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in, 4);
+ vint8mf8_t v3 = __riscv_vxor_vx_i8mf8 (v2, 16, 4);
+ vint8mf8_t v4 = __riscv_vxor_vx_i8mf8_tu (v3, v2, 16, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** ...
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** ...
+** vxor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vxor\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_m (mask, in, 4);
+ vint8mf8_t v3 = __riscv_vxor_vx_i8mf8 (v2, 16, 4);
+ vint8mf8_t v4 = __riscv_vxor_vx_i8mf8_m (mask, v3, 16, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** ...
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** ...
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** ...
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vxor\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vxor\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in, 4);
+ vint8mf8_t v3 = __riscv_vxor_vx_i8mf8 (v2, 16, 4);
+ vint8mf8_t v4 = __riscv_vxor_vx_i8mf8_tumu (mask, v3, v2, 16, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,72 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f0:
+** ...
+** vmaxu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmaxu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f0 (void * in, void *out, uint64_t x, int n)
+{
+ vuint64m1_t v = __riscv_vle64_v_u64m1 (in + 1, 4);
+ vuint64m1_t v2 = __riscv_vle64_v_u64m1_tu (v, in + 2, 4);
+ vuint64m1_t v3 = __riscv_vmaxu_vx_u64m1 (v2, -16, 4);
+ vuint64m1_t v4 = __riscv_vmaxu_vx_u64m1 (v3, -16, 4);
+ __riscv_vse64_v_u64m1 (out + 2, v4, 4);
+}
+
+/*
+** f1:
+** ...
+** vmaxu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmaxu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f1 (void * in, void *out, uint64_t x, int n)
+{
+ vuint64m1_t v = __riscv_vle64_v_u64m1 (in + 1, 4);
+ vuint64m1_t v2 = __riscv_vle64_v_u64m1_tu (v, in + 2, 4);
+ vuint64m1_t v3 = __riscv_vmaxu_vx_u64m1 (v2, 15, 4);
+ vuint64m1_t v4 = __riscv_vmaxu_vx_u64m1 (v3, 15, 4);
+ __riscv_vse64_v_u64m1 (out + 2, v4, 4);
+}
+
+/*
+** f2:
+** ...
+** vmaxu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmaxu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f2 (void * in, void *out, uint64_t x, int n)
+{
+ vuint64m1_t v = __riscv_vle64_v_u64m1 (in + 1, 4);
+ vuint64m1_t v2 = __riscv_vle64_v_u64m1_tu (v, in + 2, 4);
+ vuint64m1_t v3 = __riscv_vmaxu_vx_u64m1 (v2, 16, 4);
+ vuint64m1_t v4 = __riscv_vmaxu_vx_u64m1 (v3, 16, 4);
+ __riscv_vse64_v_u64m1 (out + 2, v4, 4);
+}
+
+/*
+** f3:
+** ...
+** vmaxu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vmaxu\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f3 (void * in, void *out, uint64_t x, int n)
+{
+ vuint64m1_t v = __riscv_vle64_v_u64m1 (in + 1, 4);
+ vuint64m1_t v2 = __riscv_vle64_v_u64m1_tu (v, in + 2, 4);
+ vuint64m1_t v3 = __riscv_vmaxu_vx_u64m1 (v2, 0xAAAAAAA, 4);
+ vuint64m1_t v4 = __riscv_vmaxu_vx_u64m1 (v3, 0xAAAAAAA, 4);
+ __riscv_vse64_v_u64m1 (out + 2, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, uint64_t x, int n)
+{
+ vuint64m1_t v = __riscv_vle64_v_u64m1 (in + 1, 4);
+ vuint64m1_t v2 = __riscv_vle64_v_u64m1_tu (v, in + 2, 4);
+ vuint64m1_t v3 = __riscv_vmaxu_vx_u64m1 (v2, 0xAAAAAAAA, 4);
+ vuint64m1_t v4 = __riscv_vmaxu_vx_u64m1_tu (v3, v2, 0xAAAAAAAA, 4);
+ __riscv_vse64_v_u64m1 (out + 2, v4, 4);
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero} 1 } } */
+/* { dg-final { scan-assembler-times {vmaxu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, uint64_t x, int n)
+{
+ vuint64m1_t v = __riscv_vle64_v_u64m1 (in + 1, 4);
+ vuint64m1_t v2 = __riscv_vle64_v_u64m1_tu (v, in + 2, 4);
+ vuint64m1_t v3 = __riscv_vmaxu_vx_u64m1 (v2, 0xAAAAAAAAAAAAAAAA, 4);
+ vuint64m1_t v4 = __riscv_vmaxu_vx_u64m1_tu (v3, v2, 0xAAAAAAAAAAAAAAAA, 4);
+ __riscv_vse64_v_u64m1 (out + 2, v4, 4);
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero} 1 } } */
+/* { dg-final { scan-assembler-times {vmaxu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, uint64_t x, int n)
+{
+ vuint64m1_t v = __riscv_vle64_v_u64m1 (in + 1, 4);
+ vuint64m1_t v2 = __riscv_vle64_v_u64m1_tu (v, in + 2, 4);
+ vuint64m1_t v3 = __riscv_vmaxu_vx_u64m1 (v2, x, 4);
+ vuint64m1_t v4 = __riscv_vmaxu_vx_u64m1_tu (v3, v2, x, 4);
+ __riscv_vse64_v_u64m1 (out + 2, v4, 4);
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero} 1 } } */
+/* { dg-final { scan-assembler-times {vmaxu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, uint64_t x, int n)
+{
+ for (int i = 0; i < n; i++) {
+ vuint64m1_t v = __riscv_vle64_v_u64m1 (in + i + 1, 4);
+ vuint64m1_t v2 = __riscv_vle64_v_u64m1_tu (v, in + i + 2, 4);
+ vuint64m1_t v3 = __riscv_vmaxu_vx_u64m1 (v2, x, 4);
+ vuint64m1_t v4 = __riscv_vmaxu_vx_u64m1_tu (v3, v2, x, 4);
+ __riscv_vse64_v_u64m1 (out + i + 2, v4, 4);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero\s+\.L[0-9]+\:\s+} 1 } } */
+/* { dg-final { scan-assembler-times {vmaxu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,123 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f0:
+** ...
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** ...
+** ret
+*/
+void f0 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vsub_vx_i64m1 (v2, -15, 4);
+ vint64m1_t v4 = __riscv_vsub_vx_i64m1 (v3, -15, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f1:
+** ...
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** ...
+** ret
+*/
+void f1 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vsub_vx_i64m1 (v2, 16, 4);
+ vint64m1_t v4 = __riscv_vsub_vx_i64m1 (v3, 16, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f2:
+** ...
+** vsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f2 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vsub_vx_i64m1 (v2, 17, 4);
+ vint64m1_t v4 = __riscv_vsub_vx_i64m1 (v3, 17, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f3:
+** ...
+** vsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f3 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vsub_vx_i64m1 (v2, 0xAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vsub_vx_i64m1 (v3, 0xAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f4:
+** ...
+** vsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f4 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vsub_vx_i64m1 (v2, 0xAAAAAAAAAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vsub_vx_i64m1 (v3, 0xAAAAAAAAAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f5:
+** ...
+** vsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f5 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vsub_vx_i64m1 (v2, 0xAAAAAAAAAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vsub_vx_i64m1 (v3, 0xAAAAAAAAAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f6:
+** ...
+** vsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f6 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vsub_vx_i64m1 (v2, x, 4);
+ vint64m1_t v4 = __riscv_vsub_vx_i64m1 (v3, x, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,72 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f0:
+** ...
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** ...
+** ret
+*/
+void f0 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vsub_vx_i64m1 (v2, -15, 4);
+ vint64m1_t v4 = __riscv_vsub_vx_i64m1 (v3, -15, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f1:
+** ...
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vadd\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** ...
+** ret
+*/
+void f1 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vsub_vx_i64m1 (v2, 16, 4);
+ vint64m1_t v4 = __riscv_vsub_vx_i64m1 (v3, 16, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f2:
+** ...
+** vsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f2 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vsub_vx_i64m1 (v2, 17, 4);
+ vint64m1_t v4 = __riscv_vsub_vx_i64m1 (v3, 17, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f3:
+** ...
+** vsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f3 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vsub_vx_i64m1 (v2, 0xAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vsub_vx_i64m1 (v3, 0xAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vsub_vx_i64m1 (v2, 0xAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vsub_vx_i64m1_tu (v3, v2, 0xAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero} 1 } } */
+/* { dg-final { scan-assembler-times {vsub\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vsub_vx_i64m1 (v2, 0xAAAAAAAAAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vsub_vx_i64m1_tu (v3, v2, 0xAAAAAAAAAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero} 1 } } */
+/* { dg-final { scan-assembler-times {vsub\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vsub_vx_i64m1 (v2, x, 4);
+ vint64m1_t v4 = __riscv_vsub_vx_i64m1_tu (v3, v2, x, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero} 1 } } */
+/* { dg-final { scan-assembler-times {vsub\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,132 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f1:
+** vsetivli\tzero,4,e32,m1,tu,ma
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vand\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vand\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out, int32_t x)
+{
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
+ vint32m1_t v3 = __riscv_vand_vx_i32m1 (v2, x, 4);
+ vint32m1_t v4 = __riscv_vand_vx_i32m1_tu (v3, v2, x, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f2:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,ta,ma
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vand\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vand\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
+ vint32m1_t v3 = __riscv_vand_vx_i32m1 (v2, x, 4);
+ vint32m1_t v4 = __riscv_vand_vx_i32m1_m (mask, v3, x, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f3:
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e32,m1,tu,mu
+** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vand\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vand\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)in;
+ asm volatile ("":::"memory");
+ vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_tumu (mask, v, in, 4);
+ vint32m1_t v3 = __riscv_vand_vx_i32m1 (v2, x, 4);
+ vint32m1_t v4 = __riscv_vand_vx_i32m1_tumu (mask, v3, v2, x, 4);
+ __riscv_vse32_v_i32m1 (out, v4, 4);
+}
+
+/*
+** f4:
+** vsetivli\tzero,4,e8,mf8,tu,ma
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vand\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vand\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f4 (void * in, void *out, int8_t x)
+{
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in, 4);
+ vint8mf8_t v3 = __riscv_vand_vx_i8mf8 (v2, x, 4);
+ vint8mf8_t v4 = __riscv_vand_vx_i8mf8_tu (v3, v2, x, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f5:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,ta,ma
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vand\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vand\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f5 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_m (mask, in, 4);
+ vint8mf8_t v3 = __riscv_vand_vx_i8mf8 (v2, x, 4);
+ vint8mf8_t v4 = __riscv_vand_vx_i8mf8_m (mask, v3, x, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
+
+/*
+** f6:
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
+** vsetivli\tzero,4,e8,mf8,tu,mu
+** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
+** vand\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vand\.vx\tv[1-9][0-9]?,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void f6 (void * in, void *out, int8_t x)
+{
+ vbool64_t mask = *(vbool64_t*)in;
+ asm volatile ("":::"memory");
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
+ vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in, 4);
+ vint8mf8_t v3 = __riscv_vand_vx_i8mf8 (v2, x, 4);
+ vint8mf8_t v4 = __riscv_vand_vx_i8mf8_tumu (mask, v3, v2, x, 4);
+ __riscv_vse8_v_i8mf8 (out, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, int32_t x, int n)
+{
+ for (int i = 0; i < n; i++) {
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + i + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + i + 2, 4);
+ vint64m1_t v3 = __riscv_vsub_vx_i64m1 (v2, x, 4);
+ vint64m1_t v4 = __riscv_vsub_vx_i64m1_tu (v3, v2, x, 4);
+ __riscv_vse64_v_i64m1 (out + i + 2, v4, 4);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero\s+\.L[0-9]+\:\s+} 1 } } */
+/* { dg-final { scan-assembler-times {vsub\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,123 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f0:
+** ...
+** vrsub\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vrsub\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** ...
+** ret
+*/
+void f0 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vrsub_vx_i64m1 (v2, -16, 4);
+ vint64m1_t v4 = __riscv_vrsub_vx_i64m1 (v3, -16, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f1:
+** ...
+** vrsub\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vrsub\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** ...
+** ret
+*/
+void f1 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vrsub_vx_i64m1 (v2, 15, 4);
+ vint64m1_t v4 = __riscv_vrsub_vx_i64m1 (v3, 15, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f2:
+** ...
+** vrsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vrsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f2 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vrsub_vx_i64m1 (v2, 16, 4);
+ vint64m1_t v4 = __riscv_vrsub_vx_i64m1 (v3, 16, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f3:
+** ...
+** vrsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vrsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f3 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vrsub_vx_i64m1 (v2, 0xAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vrsub_vx_i64m1 (v3, 0xAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f4:
+** ...
+** vrsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vrsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f4 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vrsub_vx_i64m1 (v2, 0xAAAAAAAAAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vrsub_vx_i64m1 (v3, 0xAAAAAAAAAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f5:
+** ...
+** vrsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vrsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f5 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vrsub_vx_i64m1 (v2, 0xAAAAAAAAAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vrsub_vx_i64m1 (v3, 0xAAAAAAAAAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f6:
+** ...
+** vrsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vrsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f6 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vrsub_vx_i64m1 (v2, x, 4);
+ vint64m1_t v4 = __riscv_vrsub_vx_i64m1 (v3, x, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,72 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_vector.h"
+
+/*
+** f0:
+** ...
+** vrsub\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** vrsub\.vi\tv[0-9]+,\s*v[0-9]+,\s*-16
+** ...
+** ret
+*/
+void f0 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vrsub_vx_i64m1 (v2, -16, 4);
+ vint64m1_t v4 = __riscv_vrsub_vx_i64m1 (v3, -16, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f1:
+** ...
+** vrsub\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** vrsub\.vi\tv[0-9]+,\s*v[0-9]+,\s*15
+** ...
+** ret
+*/
+void f1 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vrsub_vx_i64m1 (v2, 15, 4);
+ vint64m1_t v4 = __riscv_vrsub_vx_i64m1 (v3, 15, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f2:
+** ...
+** vrsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vrsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f2 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vrsub_vx_i64m1 (v2, 16, 4);
+ vint64m1_t v4 = __riscv_vrsub_vx_i64m1 (v3, 16, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/*
+** f3:
+** ...
+** vrsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vrsub\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** ...
+** ret
+*/
+void f3 (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vrsub_vx_i64m1 (v2, 0xAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vrsub_vx_i64m1 (v3, 0xAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vrsub_vx_i64m1 (v2, 0xAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vrsub_vx_i64m1_tu (v3, v2, 0xAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero} 1 } } */
+/* { dg-final { scan-assembler-times {vsub\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vrsub_vx_i64m1 (v2, 0xAAAAAAAAAAAAAAAA, 4);
+ vint64m1_t v4 = __riscv_vrsub_vx_i64m1_tu (v3, v2, 0xAAAAAAAAAAAAAAAA, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero} 1 } } */
+/* { dg-final { scan-assembler-times {vsub\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, int64_t x, int n)
+{
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
+ vint64m1_t v3 = __riscv_vrsub_vx_i64m1 (v2, x, 4);
+ vint64m1_t v4 = __riscv_vrsub_vx_i64m1_tu (v3, v2, x, 4);
+ __riscv_vse64_v_i64m1 (out + 2, v4, 4);
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero} 1 } } */
+/* { dg-final { scan-assembler-times {vsub\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */
+#include "riscv_vector.h"
+
+void f (void * in, void *out, int32_t x, int n)
+{
+ for (int i = 0; i < n; i++) {
+ vint64m1_t v = __riscv_vle64_v_i64m1 (in + i + 1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + i + 2, 4);
+ vint64m1_t v3 = __riscv_vrsub_vx_i64m1 (v2, x, 4);
+ vint64m1_t v4 = __riscv_vrsub_vx_i64m1_tu (v3, v2, x, 4);
+ __riscv_vse64_v_i64m1 (out + i + 2, v4, 4);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vlse64\.v\s+v[0-9]+,\s*0\([a-x0-9]+\),\s*zero\s+\.L[0-9]+\:\s+} 1 } } */
+/* { dg-final { scan-assembler-times {vsub\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */