@@ -64,13 +64,13 @@ ADJUST_ALIGNMENT (VNx16BI, 1);
ADJUST_ALIGNMENT (VNx32BI, 1);
ADJUST_ALIGNMENT (VNx64BI, 1);
-ADJUST_BYTESIZE (VNx1BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk);
-ADJUST_BYTESIZE (VNx2BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk);
-ADJUST_BYTESIZE (VNx4BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk);
-ADJUST_BYTESIZE (VNx8BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk);
-ADJUST_BYTESIZE (VNx16BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk);
-ADJUST_BYTESIZE (VNx32BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk);
-ADJUST_BYTESIZE (VNx64BI, riscv_v_adjust_nunits (VNx64BImode, 8));
+ADJUST_BYTESIZE (VNx1BI, riscv_v_adjust_bytesize (VNx1BImode, 1));
+ADJUST_BYTESIZE (VNx2BI, riscv_v_adjust_bytesize (VNx2BImode, 1));
+ADJUST_BYTESIZE (VNx4BI, riscv_v_adjust_bytesize (VNx4BImode, 1));
+ADJUST_BYTESIZE (VNx8BI, riscv_v_adjust_bytesize (VNx8BImode, 1));
+ADJUST_BYTESIZE (VNx16BI, riscv_v_adjust_bytesize (VNx16BImode, 2));
+ADJUST_BYTESIZE (VNx32BI, riscv_v_adjust_bytesize (VNx32BImode, 4));
+ADJUST_BYTESIZE (VNx64BI, riscv_v_adjust_bytesize (VNx64BImode, 8));
/*
| Mode | MIN_VLEN=32 | MIN_VLEN=32 | MIN_VLEN=64 | MIN_VLEN=64 |
@@ -1003,6 +1003,27 @@ riscv_v_adjust_nunits (machine_mode mode, int scale)
return scale;
}
+/* Call from ADJUST_BYTESIZE in riscv-modes.def. Return the correct
+ BYTES size for corresponding machine_mode. */
+
+poly_int64
+riscv_v_adjust_bytesize (machine_mode mode, int scale)
+{
+ gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
+
+ if (riscv_v_ext_vector_mode_p (mode))
+ {
+ poly_uint16 mode_size = GET_MODE_SIZE (mode);
+
+ if (known_lt (mode_size, BYTES_PER_RISCV_VECTOR))
+ return mode_size;
+ else
+ return BYTES_PER_RISCV_VECTOR;
+ }
+
+ return scale;
+}
+
/* Return true if X is a valid address for machine mode MODE. If it is,
fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
effect. */
@@ -5807,11 +5828,22 @@ riscv_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
/* Implement TARGET_MODES_TIEABLE_P.
Don't allow floating-point modes to be tied, since type punning of
- single-precision and double-precision is implementation defined. */
+ single-precision and double-precision is implementation defined.
+
+ Don't allow different vbool*_t modes to be tied, since the type
+ size is determinated by vl. */
static bool
riscv_modes_tieable_p (machine_mode mode1, machine_mode mode2)
{
+ if (riscv_v_ext_vector_mode_p (mode1) && riscv_v_ext_vector_mode_p (mode2))
+ {
+ if (VECTOR_BOOL_MODE_P (mode1) || VECTOR_BOOL_MODE_P (mode2))
+ return mode1 == mode2;
+
+ return known_eq (GET_MODE_SIZE (mode1), GET_MODE_SIZE (mode2));
+ }
+
return (mode1 == mode2
|| !(GET_MODE_CLASS (mode1) == MODE_FLOAT
&& GET_MODE_CLASS (mode2) == MODE_FLOAT));
@@ -1028,6 +1028,8 @@ extern unsigned riscv_stack_boundary;
extern unsigned riscv_bytes_per_vector_chunk;
extern poly_uint16 riscv_vector_chunks;
extern poly_int64 riscv_v_adjust_nunits (enum machine_mode, int);
+extern poly_int64 riscv_v_adjust_bytesize (machine_mode mode, int scale);
+
/* The number of bits and bytes in a RVV vector. */
#define BITS_PER_RISCV_VECTOR (poly_uint16 (riscv_vector_chunks * riscv_bytes_per_vector_chunk * 8))
#define BYTES_PER_RISCV_VECTOR (poly_uint16 (riscv_vector_chunks * riscv_bytes_per_vector_chunk))
@@ -242,6 +242,9 @@ extern const unsigned char mode_class[NUM_MACHINE_MODES];
|| CLASS == MODE_ACCUM \
|| CLASS == MODE_UACCUM)
+/* Nonzero if MODE is an vector bool mode. */
+#define VECTOR_BOOL_MODE_P(MODE) (GET_MODE_CLASS(MODE) == MODE_VECTOR_BOOL)
+
/* An optional T (i.e. a T or nothing), where T is some form of mode class. */
template<typename T>
class opt_mode
new file mode 100644
@@ -0,0 +1,68 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
+
+#include "riscv_vector.h"
+
+void
+test_vbool1_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
+ vbool1_t v1 = *(vbool1_t*)in;
+ vbool2_t v2 = *(vbool2_t*)in;
+
+ *(vbool1_t*)(out + 100) = v1;
+ *(vbool2_t*)(out + 200) = v2;
+}
+
+void
+test_vbool1_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
+ vbool1_t v1 = *(vbool1_t*)in;
+ vbool4_t v2 = *(vbool4_t*)in;
+
+ *(vbool1_t*)(out + 100) = v1;
+ *(vbool4_t*)(out + 200) = v2;
+}
+
+void
+test_vbool1_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
+ vbool1_t v1 = *(vbool1_t*)in;
+ vbool8_t v2 = *(vbool8_t*)in;
+
+ *(vbool1_t*)(out + 100) = v1;
+ *(vbool8_t*)(out + 200) = v2;
+}
+
+void
+test_vbool1_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
+ vbool1_t v1 = *(vbool1_t*)in;
+ vbool16_t v2 = *(vbool16_t*)in;
+
+ *(vbool1_t*)(out + 100) = v1;
+ *(vbool16_t*)(out + 200) = v2;
+}
+
+void
+test_vbool1_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
+ vbool1_t v1 = *(vbool1_t*)in;
+ vbool32_t v2 = *(vbool32_t*)in;
+
+ *(vbool1_t*)(out + 100) = v1;
+ *(vbool32_t*)(out + 200) = v2;
+}
+
+void
+test_vbool1_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
+ vbool1_t v1 = *(vbool1_t*)in;
+ vbool64_t v2 = *(vbool64_t*)in;
+
+ *(vbool1_t*)(out + 100) = v1;
+ *(vbool64_t*)(out + 200) = v2;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 6 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
+/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
new file mode 100644
@@ -0,0 +1,68 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
+
+#include "riscv_vector.h"
+
+void
+test_vbool2_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
+ vbool2_t v1 = *(vbool2_t*)in;
+ vbool1_t v2 = *(vbool1_t*)in;
+
+ *(vbool2_t*)(out + 100) = v1;
+ *(vbool1_t*)(out + 200) = v2;
+}
+
+void
+test_vbool2_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
+ vbool2_t v1 = *(vbool2_t*)in;
+ vbool4_t v2 = *(vbool4_t*)in;
+
+ *(vbool2_t*)(out + 100) = v1;
+ *(vbool4_t*)(out + 200) = v2;
+}
+
+void
+test_vbool2_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
+ vbool2_t v1 = *(vbool2_t*)in;
+ vbool8_t v2 = *(vbool8_t*)in;
+
+ *(vbool2_t*)(out + 100) = v1;
+ *(vbool8_t*)(out + 200) = v2;
+}
+
+void
+test_vbool2_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
+ vbool2_t v1 = *(vbool2_t*)in;
+ vbool16_t v2 = *(vbool16_t*)in;
+
+ *(vbool2_t*)(out + 100) = v1;
+ *(vbool16_t*)(out + 200) = v2;
+}
+
+void
+test_vbool2_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
+ vbool2_t v1 = *(vbool2_t*)in;
+ vbool32_t v2 = *(vbool32_t*)in;
+
+ *(vbool2_t*)(out + 100) = v1;
+ *(vbool32_t*)(out + 200) = v2;
+}
+
+void
+test_vbool2_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
+ vbool2_t v1 = *(vbool2_t*)in;
+ vbool64_t v2 = *(vbool64_t*)in;
+
+ *(vbool2_t*)(out + 100) = v1;
+ *(vbool64_t*)(out + 200) = v2;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 6 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
+/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
new file mode 100644
@@ -0,0 +1,68 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
+
+#include "riscv_vector.h"
+
+void
+test_vbool4_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
+ vbool4_t v1 = *(vbool4_t*)in;
+ vbool1_t v2 = *(vbool1_t*)in;
+
+ *(vbool4_t*)(out + 100) = v1;
+ *(vbool1_t*)(out + 200) = v2;
+}
+
+void
+test_vbool4_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
+ vbool4_t v1 = *(vbool4_t*)in;
+ vbool2_t v2 = *(vbool2_t*)in;
+
+ *(vbool4_t*)(out + 100) = v1;
+ *(vbool2_t*)(out + 200) = v2;
+}
+
+void
+test_vbool4_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
+ vbool4_t v1 = *(vbool4_t*)in;
+ vbool8_t v2 = *(vbool8_t*)in;
+
+ *(vbool4_t*)(out + 100) = v1;
+ *(vbool8_t*)(out + 200) = v2;
+}
+
+void
+test_vbool4_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
+ vbool4_t v1 = *(vbool4_t*)in;
+ vbool16_t v2 = *(vbool16_t*)in;
+
+ *(vbool4_t*)(out + 100) = v1;
+ *(vbool16_t*)(out + 200) = v2;
+}
+
+void
+test_vbool4_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
+ vbool4_t v1 = *(vbool4_t*)in;
+ vbool32_t v2 = *(vbool32_t*)in;
+
+ *(vbool4_t*)(out + 100) = v1;
+ *(vbool32_t*)(out + 200) = v2;
+}
+
+void
+test_vbool4_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
+ vbool4_t v1 = *(vbool4_t*)in;
+ vbool64_t v2 = *(vbool64_t*)in;
+
+ *(vbool4_t*)(out + 100) = v1;
+ *(vbool64_t*)(out + 200) = v2;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 6 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
+/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
new file mode 100644
@@ -0,0 +1,68 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
+
+#include "riscv_vector.h"
+
+void
+test_vbool8_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
+ vbool8_t v1 = *(vbool8_t*)in;
+ vbool1_t v2 = *(vbool1_t*)in;
+
+ *(vbool8_t*)(out + 100) = v1;
+ *(vbool1_t*)(out + 200) = v2;
+}
+
+void
+test_vbool8_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
+ vbool8_t v1 = *(vbool8_t*)in;
+ vbool2_t v2 = *(vbool2_t*)in;
+
+ *(vbool8_t*)(out + 100) = v1;
+ *(vbool2_t*)(out + 200) = v2;
+}
+
+void
+test_vbool8_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
+ vbool8_t v1 = *(vbool8_t*)in;
+ vbool4_t v2 = *(vbool4_t*)in;
+
+ *(vbool8_t*)(out + 100) = v1;
+ *(vbool4_t*)(out + 200) = v2;
+}
+
+void
+test_vbool8_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
+ vbool8_t v1 = *(vbool8_t*)in;
+ vbool16_t v2 = *(vbool16_t*)in;
+
+ *(vbool8_t*)(out + 100) = v1;
+ *(vbool16_t*)(out + 200) = v2;
+}
+
+void
+test_vbool8_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
+ vbool8_t v1 = *(vbool8_t*)in;
+ vbool32_t v2 = *(vbool32_t*)in;
+
+ *(vbool8_t*)(out + 100) = v1;
+ *(vbool32_t*)(out + 200) = v2;
+}
+
+void
+test_vbool8_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
+ vbool8_t v1 = *(vbool8_t*)in;
+ vbool64_t v2 = *(vbool64_t*)in;
+
+ *(vbool8_t*)(out + 100) = v1;
+ *(vbool64_t*)(out + 200) = v2;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 6 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
+/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
new file mode 100644
@@ -0,0 +1,68 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
+
+#include "riscv_vector.h"
+
+void
+test_vbool16_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
+ vbool16_t v1 = *(vbool16_t*)in;
+ vbool1_t v2 = *(vbool1_t*)in;
+
+ *(vbool16_t*)(out + 100) = v1;
+ *(vbool1_t*)(out + 200) = v2;
+}
+
+void
+test_vbool16_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
+ vbool16_t v1 = *(vbool16_t*)in;
+ vbool2_t v2 = *(vbool2_t*)in;
+
+ *(vbool16_t*)(out + 100) = v1;
+ *(vbool2_t*)(out + 200) = v2;
+}
+
+void
+test_vbool16_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
+ vbool16_t v1 = *(vbool16_t*)in;
+ vbool4_t v2 = *(vbool4_t*)in;
+
+ *(vbool16_t*)(out + 100) = v1;
+ *(vbool4_t*)(out + 200) = v2;
+}
+
+void
+test_vbool16_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
+ vbool16_t v1 = *(vbool16_t*)in;
+ vbool8_t v2 = *(vbool8_t*)in;
+
+ *(vbool16_t*)(out + 100) = v1;
+ *(vbool8_t*)(out + 200) = v2;
+}
+
+void
+test_vbool16_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
+ vbool16_t v1 = *(vbool16_t*)in;
+ vbool32_t v2 = *(vbool32_t*)in;
+
+ *(vbool16_t*)(out + 100) = v1;
+ *(vbool32_t*)(out + 200) = v2;
+}
+
+void
+test_vbool16_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
+ vbool16_t v1 = *(vbool16_t*)in;
+ vbool64_t v2 = *(vbool64_t*)in;
+
+ *(vbool16_t*)(out + 100) = v1;
+ *(vbool64_t*)(out + 200) = v2;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 6 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
+/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
new file mode 100644
@@ -0,0 +1,68 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
+
+#include "riscv_vector.h"
+
+void
+test_vbool32_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
+ vbool32_t v1 = *(vbool32_t*)in;
+ vbool1_t v2 = *(vbool1_t*)in;
+
+ *(vbool32_t*)(out + 100) = v1;
+ *(vbool1_t*)(out + 200) = v2;
+}
+
+void
+test_vbool32_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
+ vbool32_t v1 = *(vbool32_t*)in;
+ vbool2_t v2 = *(vbool2_t*)in;
+
+ *(vbool32_t*)(out + 100) = v1;
+ *(vbool2_t*)(out + 200) = v2;
+}
+
+void
+test_vbool32_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
+ vbool32_t v1 = *(vbool32_t*)in;
+ vbool4_t v2 = *(vbool4_t*)in;
+
+ *(vbool32_t*)(out + 100) = v1;
+ *(vbool4_t*)(out + 200) = v2;
+}
+
+void
+test_vbool32_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
+ vbool32_t v1 = *(vbool32_t*)in;
+ vbool8_t v2 = *(vbool8_t*)in;
+
+ *(vbool32_t*)(out + 100) = v1;
+ *(vbool8_t*)(out + 200) = v2;
+}
+
+void
+test_vbool32_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
+ vbool32_t v1 = *(vbool32_t*)in;
+ vbool16_t v2 = *(vbool16_t*)in;
+
+ *(vbool32_t*)(out + 100) = v1;
+ *(vbool16_t*)(out + 200) = v2;
+}
+
+void
+test_vbool32_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
+ vbool32_t v1 = *(vbool32_t*)in;
+ vbool64_t v2 = *(vbool64_t*)in;
+
+ *(vbool32_t*)(out + 100) = v1;
+ *(vbool64_t*)(out + 200) = v2;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 6 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
+/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
new file mode 100644
@@ -0,0 +1,68 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
+
+#include "riscv_vector.h"
+
+void
+test_vbool64_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
+ vbool64_t v1 = *(vbool64_t*)in;
+ vbool1_t v2 = *(vbool1_t*)in;
+
+ *(vbool64_t*)(out + 100) = v1;
+ *(vbool1_t*)(out + 200) = v2;
+}
+
+void
+test_vbool64_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
+ vbool64_t v1 = *(vbool64_t*)in;
+ vbool2_t v2 = *(vbool2_t*)in;
+
+ *(vbool64_t*)(out + 100) = v1;
+ *(vbool2_t*)(out + 200) = v2;
+}
+
+void
+test_vbool64_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
+ vbool64_t v1 = *(vbool64_t*)in;
+ vbool4_t v2 = *(vbool4_t*)in;
+
+ *(vbool64_t*)(out + 100) = v1;
+ *(vbool4_t*)(out + 200) = v2;
+}
+
+void
+test_vbool64_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
+ vbool64_t v1 = *(vbool64_t*)in;
+ vbool8_t v2 = *(vbool8_t*)in;
+
+ *(vbool64_t*)(out + 100) = v1;
+ *(vbool8_t*)(out + 200) = v2;
+}
+
+void
+test_vbool64_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
+ vbool64_t v1 = *(vbool64_t*)in;
+ vbool16_t v2 = *(vbool16_t*)in;
+
+ *(vbool64_t*)(out + 100) = v1;
+ *(vbool16_t*)(out + 200) = v2;
+}
+
+void
+test_vbool64_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
+ vbool64_t v1 = *(vbool64_t*)in;
+ vbool32_t v2 = *(vbool32_t*)in;
+
+ *(vbool64_t*)(out + 100) = v1;
+ *(vbool32_t*)(out + 200) = v2;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 6 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
+/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
new file mode 100644
@@ -0,0 +1,77 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
+
+#include "riscv_vector.h"
+
+void
+test_vbool1_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
+ vbool1_t v1 = *(vbool1_t*)in;
+ vbool1_t v2 = *(vbool1_t*)in;
+
+ *(vbool1_t*)(out + 100) = v1;
+ *(vbool1_t*)(out + 200) = v2;
+}
+
+void
+test_vbool2_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
+ vbool2_t v1 = *(vbool2_t*)in;
+ vbool2_t v2 = *(vbool2_t*)in;
+
+ *(vbool2_t*)(out + 100) = v1;
+ *(vbool2_t*)(out + 200) = v2;
+}
+
+void
+test_vbool4_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
+ vbool4_t v1 = *(vbool4_t*)in;
+ vbool4_t v2 = *(vbool4_t*)in;
+
+ *(vbool4_t*)(out + 100) = v1;
+ *(vbool4_t*)(out + 200) = v2;
+}
+
+void
+test_vbool8_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
+ vbool8_t v1 = *(vbool8_t*)in;
+ vbool8_t v2 = *(vbool8_t*)in;
+
+ *(vbool8_t*)(out + 100) = v1;
+ *(vbool8_t*)(out + 200) = v2;
+}
+
+void
+test_vbool16_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
+ vbool16_t v1 = *(vbool16_t*)in;
+ vbool16_t v2 = *(vbool16_t*)in;
+
+ *(vbool16_t*)(out + 100) = v1;
+ *(vbool16_t*)(out + 200) = v2;
+}
+
+void
+test_vbool32_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
+ vbool32_t v1 = *(vbool32_t*)in;
+ vbool32_t v2 = *(vbool32_t*)in;
+
+ *(vbool32_t*)(out + 100) = v1;
+ *(vbool32_t*)(out + 200) = v2;
+}
+
+void
+test_vbool64_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
+ vbool64_t v1 = *(vbool64_t*)in;
+ vbool64_t v2 = *(vbool64_t*)in;
+
+ *(vbool64_t*)(out + 100) = v1;
+ *(vbool64_t*)(out + 200) = v2;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 7 } } */
+/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 14 } } */
@@ -43,6 +43,7 @@ along with GCC; see the file COPYING3. If not see
#include "gimple-fold.h"
#include "tree-eh.h"
#include "gimplify.h"
+#include "target.h"
#include "flags.h"
#include "dojump.h"
#include "explow.h"
@@ -5657,10 +5658,16 @@ visit_reference_op_load (tree lhs, tree op, gimple *stmt)
if (result
&& !useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (op)))
{
+ machine_mode result_mode = TYPE_MODE (TREE_TYPE (result));
+ machine_mode op_mode = TYPE_MODE (TREE_TYPE (op));
+ poly_uint16 result_mode_precision = GET_MODE_PRECISION (result_mode);
+ poly_uint16 op_mode_precision = GET_MODE_PRECISION (op_mode);
+
/* Avoid the type punning in case the result mode has padding where
- the op we lookup has not. */
- if (maybe_lt (GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (result))),
- GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op)))))
+ the op we lookup has not.
+ Avoid the type punning in case the target mode cannot be tied. */
+ if (maybe_lt (result_mode_precision, op_mode_precision)
+ || !targetm.modes_tieable_p (result_mode, op_mode))
result = NULL_TREE;
else
{