RISC-V: Optimize the code gen of VLM/VSM.

Message ID BYAPR04MB4824276DFE4615A5C14150FAA4DF9@BYAPR04MB4824.namprd04.prod.outlook.com
State Accepted
Headers
Series RISC-V: Optimize the code gen of VLM/VSM. |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

Li, Pan2 via Gcc-patches Feb. 11, 2023, 2:51 a.m. UTC
  From: Pan Li <incarnation.p.lee@outlook.com>

	PR 108185
	PR 108654

	The bytesize of the vbool*_t isn't well defined. This patch
	adjust the rvv bool modes with actually mode size in bytes.
	However, only allow mode tieable when exactly equal for the
	rvv bool types, aka vbool1_t, vbool2_t, vbool4_t, vbool8_t,
	vbool16_t, vbool32_t, and vbool64_t.

gcc/ChangeLog:

	* config/riscv/riscv-modes.def (ADJUST_BYTESIZE):
	* config/riscv/riscv.cc (riscv_v_adjust_bytesize):
	(riscv_modes_tieable_p):
	* config/riscv/riscv.h (riscv_v_adjust_bytesize):
	* machmode.h (VECTOR_BOOL_MODE_P):
	* tree-ssa-sccvn.cc (visit_reference_op_load):

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/pr108185-1.c: New test.
	* gcc.target/riscv/pr108185-2.c: New test.
	* gcc.target/riscv/pr108185-3.c: New test.
	* gcc.target/riscv/pr108185-4.c: New test.
	* gcc.target/riscv/pr108185-5.c: New test.
	* gcc.target/riscv/pr108185-6.c: New test.
	* gcc.target/riscv/pr108185-7.c: New test.
	* gcc.target/riscv/pr108185-8.c: New test.
---
 gcc/config/riscv/riscv-modes.def            | 14 ++--
 gcc/config/riscv/riscv.cc                   | 34 ++++++++-
 gcc/config/riscv/riscv.h                    |  2 +
 gcc/machmode.h                              |  3 +
 gcc/testsuite/gcc.target/riscv/pr108185-1.c | 68 ++++++++++++++++++
 gcc/testsuite/gcc.target/riscv/pr108185-2.c | 68 ++++++++++++++++++
 gcc/testsuite/gcc.target/riscv/pr108185-3.c | 68 ++++++++++++++++++
 gcc/testsuite/gcc.target/riscv/pr108185-4.c | 68 ++++++++++++++++++
 gcc/testsuite/gcc.target/riscv/pr108185-5.c | 68 ++++++++++++++++++
 gcc/testsuite/gcc.target/riscv/pr108185-6.c | 68 ++++++++++++++++++
 gcc/testsuite/gcc.target/riscv/pr108185-7.c | 68 ++++++++++++++++++
 gcc/testsuite/gcc.target/riscv/pr108185-8.c | 77 +++++++++++++++++++++
 gcc/tree-ssa-sccvn.cc                       | 13 +++-
 13 files changed, 608 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-6.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-7.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-8.c
  

Patch

diff --git a/gcc/config/riscv/riscv-modes.def b/gcc/config/riscv/riscv-modes.def
index d5305efa8a6..cc21d3c83a2 100644
--- a/gcc/config/riscv/riscv-modes.def
+++ b/gcc/config/riscv/riscv-modes.def
@@ -64,13 +64,13 @@  ADJUST_ALIGNMENT (VNx16BI, 1);
 ADJUST_ALIGNMENT (VNx32BI, 1);
 ADJUST_ALIGNMENT (VNx64BI, 1);
 
-ADJUST_BYTESIZE (VNx1BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk);
-ADJUST_BYTESIZE (VNx2BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk);
-ADJUST_BYTESIZE (VNx4BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk);
-ADJUST_BYTESIZE (VNx8BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk);
-ADJUST_BYTESIZE (VNx16BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk);
-ADJUST_BYTESIZE (VNx32BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk);
-ADJUST_BYTESIZE (VNx64BI, riscv_v_adjust_nunits (VNx64BImode, 8));
+ADJUST_BYTESIZE (VNx1BI, riscv_v_adjust_bytesize (VNx1BImode, 1));
+ADJUST_BYTESIZE (VNx2BI, riscv_v_adjust_bytesize (VNx2BImode, 1));
+ADJUST_BYTESIZE (VNx4BI, riscv_v_adjust_bytesize (VNx4BImode, 1));
+ADJUST_BYTESIZE (VNx8BI, riscv_v_adjust_bytesize (VNx8BImode, 1));
+ADJUST_BYTESIZE (VNx16BI, riscv_v_adjust_bytesize (VNx16BImode, 2));
+ADJUST_BYTESIZE (VNx32BI, riscv_v_adjust_bytesize (VNx32BImode, 4));
+ADJUST_BYTESIZE (VNx64BI, riscv_v_adjust_bytesize (VNx64BImode, 8));
 
 /*
    | Mode        | MIN_VLEN=32 | MIN_VLEN=32 | MIN_VLEN=64 | MIN_VLEN=64 |
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 3b7804b7501..995cdab108f 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -1003,6 +1003,27 @@  riscv_v_adjust_nunits (machine_mode mode, int scale)
   return scale;
 }
 
+/* Call from ADJUST_BYTESIZE in riscv-modes.def.  Return the correct
+   BYTES size for corresponding machine_mode.  */
+
+poly_int64
+riscv_v_adjust_bytesize (machine_mode mode, int scale)
+{
+  gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
+
+  if (riscv_v_ext_vector_mode_p (mode))
+    {
+      poly_uint16 mode_size = GET_MODE_SIZE (mode);
+
+      if (known_lt (mode_size, BYTES_PER_RISCV_VECTOR))
+	return mode_size;
+      else
+	return BYTES_PER_RISCV_VECTOR;
+    }
+
+  return scale;
+}
+
 /* Return true if X is a valid address for machine mode MODE.  If it is,
    fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
    effect.  */
@@ -5807,11 +5828,22 @@  riscv_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
 /* Implement TARGET_MODES_TIEABLE_P.
 
    Don't allow floating-point modes to be tied, since type punning of
-   single-precision and double-precision is implementation defined.  */
+   single-precision and double-precision is implementation defined.
+
+   Don't allow different vbool*_t modes to be tied, since the type
+   size is determinated by vl.  */
 
 static bool
 riscv_modes_tieable_p (machine_mode mode1, machine_mode mode2)
 {
+  if (riscv_v_ext_vector_mode_p (mode1) && riscv_v_ext_vector_mode_p (mode2))
+    {
+      if (VECTOR_BOOL_MODE_P (mode1) || VECTOR_BOOL_MODE_P (mode2))
+	return mode1 == mode2;
+
+	return known_eq (GET_MODE_SIZE (mode1), GET_MODE_SIZE (mode2));
+    }
+
   return (mode1 == mode2
 	  || !(GET_MODE_CLASS (mode1) == MODE_FLOAT
 	       && GET_MODE_CLASS (mode2) == MODE_FLOAT));
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index faffd5a77fe..f857223338c 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -1028,6 +1028,8 @@  extern unsigned riscv_stack_boundary;
 extern unsigned riscv_bytes_per_vector_chunk;
 extern poly_uint16 riscv_vector_chunks;
 extern poly_int64 riscv_v_adjust_nunits (enum machine_mode, int);
+extern poly_int64 riscv_v_adjust_bytesize (machine_mode mode, int scale);
+
 /* The number of bits and bytes in a RVV vector.  */
 #define BITS_PER_RISCV_VECTOR (poly_uint16 (riscv_vector_chunks * riscv_bytes_per_vector_chunk * 8))
 #define BYTES_PER_RISCV_VECTOR (poly_uint16 (riscv_vector_chunks * riscv_bytes_per_vector_chunk))
diff --git a/gcc/machmode.h b/gcc/machmode.h
index f1865c1ef42..6720472f2c9 100644
--- a/gcc/machmode.h
+++ b/gcc/machmode.h
@@ -242,6 +242,9 @@  extern const unsigned char mode_class[NUM_MACHINE_MODES];
    || CLASS == MODE_ACCUM                      \
    || CLASS == MODE_UACCUM)
 
+/* Nonzero if MODE is an vector bool mode.  */
+#define VECTOR_BOOL_MODE_P(MODE) (GET_MODE_CLASS(MODE) == MODE_VECTOR_BOOL)
+
 /* An optional T (i.e. a T or nothing), where T is some form of mode class.  */
 template<typename T>
 class opt_mode
diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-1.c b/gcc/testsuite/gcc.target/riscv/pr108185-1.c
new file mode 100644
index 00000000000..c3d0b10271a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr108185-1.c
@@ -0,0 +1,68 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
+
+#include "riscv_vector.h"
+
+void
+test_vbool1_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
+    vbool1_t v1 = *(vbool1_t*)in;
+    vbool2_t v2 = *(vbool2_t*)in;
+
+    *(vbool1_t*)(out + 100) = v1;
+    *(vbool2_t*)(out + 200) = v2;
+}
+
+void
+test_vbool1_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
+    vbool1_t v1 = *(vbool1_t*)in;
+    vbool4_t v2 = *(vbool4_t*)in;
+
+    *(vbool1_t*)(out + 100) = v1;
+    *(vbool4_t*)(out + 200) = v2;
+}
+
+void
+test_vbool1_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
+    vbool1_t v1 = *(vbool1_t*)in;
+    vbool8_t v2 = *(vbool8_t*)in;
+
+    *(vbool1_t*)(out + 100) = v1;
+    *(vbool8_t*)(out + 200) = v2;
+}
+
+void
+test_vbool1_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
+    vbool1_t v1 = *(vbool1_t*)in;
+    vbool16_t v2 = *(vbool16_t*)in;
+
+    *(vbool1_t*)(out + 100) = v1;
+    *(vbool16_t*)(out + 200) = v2;
+}
+
+void
+test_vbool1_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
+    vbool1_t v1 = *(vbool1_t*)in;
+    vbool32_t v2 = *(vbool32_t*)in;
+
+    *(vbool1_t*)(out + 100) = v1;
+    *(vbool32_t*)(out + 200) = v2;
+}
+
+void
+test_vbool1_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
+    vbool1_t v1 = *(vbool1_t*)in;
+    vbool64_t v2 = *(vbool64_t*)in;
+
+    *(vbool1_t*)(out + 100) = v1;
+    *(vbool64_t*)(out + 200) = v2;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 6 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
+/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-2.c b/gcc/testsuite/gcc.target/riscv/pr108185-2.c
new file mode 100644
index 00000000000..bd13ba916da
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr108185-2.c
@@ -0,0 +1,68 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
+
+#include "riscv_vector.h"
+
+void
+test_vbool2_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
+    vbool2_t v1 = *(vbool2_t*)in;
+    vbool1_t v2 = *(vbool1_t*)in;
+
+    *(vbool2_t*)(out + 100) = v1;
+    *(vbool1_t*)(out + 200) = v2;
+}
+
+void
+test_vbool2_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
+    vbool2_t v1 = *(vbool2_t*)in;
+    vbool4_t v2 = *(vbool4_t*)in;
+
+    *(vbool2_t*)(out + 100) = v1;
+    *(vbool4_t*)(out + 200) = v2;
+}
+
+void
+test_vbool2_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
+    vbool2_t v1 = *(vbool2_t*)in;
+    vbool8_t v2 = *(vbool8_t*)in;
+
+    *(vbool2_t*)(out + 100) = v1;
+    *(vbool8_t*)(out + 200) = v2;
+}
+
+void
+test_vbool2_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
+    vbool2_t v1 = *(vbool2_t*)in;
+    vbool16_t v2 = *(vbool16_t*)in;
+
+    *(vbool2_t*)(out + 100) = v1;
+    *(vbool16_t*)(out + 200) = v2;
+}
+
+void
+test_vbool2_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
+    vbool2_t v1 = *(vbool2_t*)in;
+    vbool32_t v2 = *(vbool32_t*)in;
+
+    *(vbool2_t*)(out + 100) = v1;
+    *(vbool32_t*)(out + 200) = v2;
+}
+
+void
+test_vbool2_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
+    vbool2_t v1 = *(vbool2_t*)in;
+    vbool64_t v2 = *(vbool64_t*)in;
+
+    *(vbool2_t*)(out + 100) = v1;
+    *(vbool64_t*)(out + 200) = v2;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 6 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
+/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-3.c b/gcc/testsuite/gcc.target/riscv/pr108185-3.c
new file mode 100644
index 00000000000..99928f7b1cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr108185-3.c
@@ -0,0 +1,68 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
+
+#include "riscv_vector.h"
+
+void
+test_vbool4_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
+    vbool4_t v1 = *(vbool4_t*)in;
+    vbool1_t v2 = *(vbool1_t*)in;
+
+    *(vbool4_t*)(out + 100) = v1;
+    *(vbool1_t*)(out + 200) = v2;
+}
+
+void
+test_vbool4_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
+    vbool4_t v1 = *(vbool4_t*)in;
+    vbool2_t v2 = *(vbool2_t*)in;
+
+    *(vbool4_t*)(out + 100) = v1;
+    *(vbool2_t*)(out + 200) = v2;
+}
+
+void
+test_vbool4_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
+    vbool4_t v1 = *(vbool4_t*)in;
+    vbool8_t v2 = *(vbool8_t*)in;
+
+    *(vbool4_t*)(out + 100) = v1;
+    *(vbool8_t*)(out + 200) = v2;
+}
+
+void
+test_vbool4_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
+    vbool4_t v1 = *(vbool4_t*)in;
+    vbool16_t v2 = *(vbool16_t*)in;
+
+    *(vbool4_t*)(out + 100) = v1;
+    *(vbool16_t*)(out + 200) = v2;
+}
+
+void
+test_vbool4_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
+    vbool4_t v1 = *(vbool4_t*)in;
+    vbool32_t v2 = *(vbool32_t*)in;
+
+    *(vbool4_t*)(out + 100) = v1;
+    *(vbool32_t*)(out + 200) = v2;
+}
+
+void
+test_vbool4_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
+    vbool4_t v1 = *(vbool4_t*)in;
+    vbool64_t v2 = *(vbool64_t*)in;
+
+    *(vbool4_t*)(out + 100) = v1;
+    *(vbool64_t*)(out + 200) = v2;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 6 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
+/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-4.c b/gcc/testsuite/gcc.target/riscv/pr108185-4.c
new file mode 100644
index 00000000000..e70284fada8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr108185-4.c
@@ -0,0 +1,68 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
+
+#include "riscv_vector.h"
+
+void
+test_vbool8_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
+    vbool8_t v1 = *(vbool8_t*)in;
+    vbool1_t v2 = *(vbool1_t*)in;
+
+    *(vbool8_t*)(out + 100) = v1;
+    *(vbool1_t*)(out + 200) = v2;
+}
+
+void
+test_vbool8_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
+    vbool8_t v1 = *(vbool8_t*)in;
+    vbool2_t v2 = *(vbool2_t*)in;
+
+    *(vbool8_t*)(out + 100) = v1;
+    *(vbool2_t*)(out + 200) = v2;
+}
+
+void
+test_vbool8_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
+    vbool8_t v1 = *(vbool8_t*)in;
+    vbool4_t v2 = *(vbool4_t*)in;
+
+    *(vbool8_t*)(out + 100) = v1;
+    *(vbool4_t*)(out + 200) = v2;
+}
+
+void
+test_vbool8_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
+    vbool8_t v1 = *(vbool8_t*)in;
+    vbool16_t v2 = *(vbool16_t*)in;
+
+    *(vbool8_t*)(out + 100) = v1;
+    *(vbool16_t*)(out + 200) = v2;
+}
+
+void
+test_vbool8_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
+    vbool8_t v1 = *(vbool8_t*)in;
+    vbool32_t v2 = *(vbool32_t*)in;
+
+    *(vbool8_t*)(out + 100) = v1;
+    *(vbool32_t*)(out + 200) = v2;
+}
+
+void
+test_vbool8_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
+    vbool8_t v1 = *(vbool8_t*)in;
+    vbool64_t v2 = *(vbool64_t*)in;
+
+    *(vbool8_t*)(out + 100) = v1;
+    *(vbool64_t*)(out + 200) = v2;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 6 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
+/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-5.c b/gcc/testsuite/gcc.target/riscv/pr108185-5.c
new file mode 100644
index 00000000000..575a7842cdf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr108185-5.c
@@ -0,0 +1,68 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
+
+#include "riscv_vector.h"
+
+void
+test_vbool16_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
+    vbool16_t v1 = *(vbool16_t*)in;
+    vbool1_t v2 = *(vbool1_t*)in;
+
+    *(vbool16_t*)(out + 100) = v1;
+    *(vbool1_t*)(out + 200) = v2;
+}
+
+void
+test_vbool16_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
+    vbool16_t v1 = *(vbool16_t*)in;
+    vbool2_t v2 = *(vbool2_t*)in;
+
+    *(vbool16_t*)(out + 100) = v1;
+    *(vbool2_t*)(out + 200) = v2;
+}
+
+void
+test_vbool16_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
+    vbool16_t v1 = *(vbool16_t*)in;
+    vbool4_t v2 = *(vbool4_t*)in;
+
+    *(vbool16_t*)(out + 100) = v1;
+    *(vbool4_t*)(out + 200) = v2;
+}
+
+void
+test_vbool16_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
+    vbool16_t v1 = *(vbool16_t*)in;
+    vbool8_t v2 = *(vbool8_t*)in;
+
+    *(vbool16_t*)(out + 100) = v1;
+    *(vbool8_t*)(out + 200) = v2;
+}
+
+void
+test_vbool16_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
+    vbool16_t v1 = *(vbool16_t*)in;
+    vbool32_t v2 = *(vbool32_t*)in;
+
+    *(vbool16_t*)(out + 100) = v1;
+    *(vbool32_t*)(out + 200) = v2;
+}
+
+void
+test_vbool16_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
+    vbool16_t v1 = *(vbool16_t*)in;
+    vbool64_t v2 = *(vbool64_t*)in;
+
+    *(vbool16_t*)(out + 100) = v1;
+    *(vbool64_t*)(out + 200) = v2;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 6 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
+/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-6.c b/gcc/testsuite/gcc.target/riscv/pr108185-6.c
new file mode 100644
index 00000000000..95a11d37016
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr108185-6.c
@@ -0,0 +1,68 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
+
+#include "riscv_vector.h"
+
+void
+test_vbool32_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
+    vbool32_t v1 = *(vbool32_t*)in;
+    vbool1_t v2 = *(vbool1_t*)in;
+
+    *(vbool32_t*)(out + 100) = v1;
+    *(vbool1_t*)(out + 200) = v2;
+}
+
+void
+test_vbool32_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
+    vbool32_t v1 = *(vbool32_t*)in;
+    vbool2_t v2 = *(vbool2_t*)in;
+
+    *(vbool32_t*)(out + 100) = v1;
+    *(vbool2_t*)(out + 200) = v2;
+}
+
+void
+test_vbool32_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
+    vbool32_t v1 = *(vbool32_t*)in;
+    vbool4_t v2 = *(vbool4_t*)in;
+
+    *(vbool32_t*)(out + 100) = v1;
+    *(vbool4_t*)(out + 200) = v2;
+}
+
+void
+test_vbool32_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
+    vbool32_t v1 = *(vbool32_t*)in;
+    vbool8_t v2 = *(vbool8_t*)in;
+
+    *(vbool32_t*)(out + 100) = v1;
+    *(vbool8_t*)(out + 200) = v2;
+}
+
+void
+test_vbool32_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
+    vbool32_t v1 = *(vbool32_t*)in;
+    vbool16_t v2 = *(vbool16_t*)in;
+
+    *(vbool32_t*)(out + 100) = v1;
+    *(vbool16_t*)(out + 200) = v2;
+}
+
+void
+test_vbool32_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
+    vbool32_t v1 = *(vbool32_t*)in;
+    vbool64_t v2 = *(vbool64_t*)in;
+
+    *(vbool32_t*)(out + 100) = v1;
+    *(vbool64_t*)(out + 200) = v2;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 6 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
+/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-7.c b/gcc/testsuite/gcc.target/riscv/pr108185-7.c
new file mode 100644
index 00000000000..8f6f0b11f09
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr108185-7.c
@@ -0,0 +1,68 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
+
+#include "riscv_vector.h"
+
+void
+test_vbool64_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
+    vbool64_t v1 = *(vbool64_t*)in;
+    vbool1_t v2 = *(vbool1_t*)in;
+
+    *(vbool64_t*)(out + 100) = v1;
+    *(vbool1_t*)(out + 200) = v2;
+}
+
+void
+test_vbool64_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
+    vbool64_t v1 = *(vbool64_t*)in;
+    vbool2_t v2 = *(vbool2_t*)in;
+
+    *(vbool64_t*)(out + 100) = v1;
+    *(vbool2_t*)(out + 200) = v2;
+}
+
+void
+test_vbool64_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
+    vbool64_t v1 = *(vbool64_t*)in;
+    vbool4_t v2 = *(vbool4_t*)in;
+
+    *(vbool64_t*)(out + 100) = v1;
+    *(vbool4_t*)(out + 200) = v2;
+}
+
+void
+test_vbool64_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
+    vbool64_t v1 = *(vbool64_t*)in;
+    vbool8_t v2 = *(vbool8_t*)in;
+
+    *(vbool64_t*)(out + 100) = v1;
+    *(vbool8_t*)(out + 200) = v2;
+}
+
+void
+test_vbool64_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
+    vbool64_t v1 = *(vbool64_t*)in;
+    vbool16_t v2 = *(vbool16_t*)in;
+
+    *(vbool64_t*)(out + 100) = v1;
+    *(vbool16_t*)(out + 200) = v2;
+}
+
+void
+test_vbool64_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
+    vbool64_t v1 = *(vbool64_t*)in;
+    vbool32_t v2 = *(vbool32_t*)in;
+
+    *(vbool64_t*)(out + 100) = v1;
+    *(vbool32_t*)(out + 200) = v2;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 6 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
+/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-8.c b/gcc/testsuite/gcc.target/riscv/pr108185-8.c
new file mode 100644
index 00000000000..d96959dd064
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr108185-8.c
@@ -0,0 +1,77 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
+
+#include "riscv_vector.h"
+
+void
+test_vbool1_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
+    vbool1_t v1 = *(vbool1_t*)in;
+    vbool1_t v2 = *(vbool1_t*)in;
+
+    *(vbool1_t*)(out + 100) = v1;
+    *(vbool1_t*)(out + 200) = v2;
+}
+
+void
+test_vbool2_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
+    vbool2_t v1 = *(vbool2_t*)in;
+    vbool2_t v2 = *(vbool2_t*)in;
+
+    *(vbool2_t*)(out + 100) = v1;
+    *(vbool2_t*)(out + 200) = v2;
+}
+
+void
+test_vbool4_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
+    vbool4_t v1 = *(vbool4_t*)in;
+    vbool4_t v2 = *(vbool4_t*)in;
+
+    *(vbool4_t*)(out + 100) = v1;
+    *(vbool4_t*)(out + 200) = v2;
+}
+
+void
+test_vbool8_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
+    vbool8_t v1 = *(vbool8_t*)in;
+    vbool8_t v2 = *(vbool8_t*)in;
+
+    *(vbool8_t*)(out + 100) = v1;
+    *(vbool8_t*)(out + 200) = v2;
+}
+
+void
+test_vbool16_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
+    vbool16_t v1 = *(vbool16_t*)in;
+    vbool16_t v2 = *(vbool16_t*)in;
+
+    *(vbool16_t*)(out + 100) = v1;
+    *(vbool16_t*)(out + 200) = v2;
+}
+
+void
+test_vbool32_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
+    vbool32_t v1 = *(vbool32_t*)in;
+    vbool32_t v2 = *(vbool32_t*)in;
+
+    *(vbool32_t*)(out + 100) = v1;
+    *(vbool32_t*)(out + 200) = v2;
+}
+
+void
+test_vbool64_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
+    vbool64_t v1 = *(vbool64_t*)in;
+    vbool64_t v2 = *(vbool64_t*)in;
+
+    *(vbool64_t*)(out + 100) = v1;
+    *(vbool64_t*)(out + 200) = v2;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 7 } } */
+/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 14 } } */
diff --git a/gcc/tree-ssa-sccvn.cc b/gcc/tree-ssa-sccvn.cc
index 028bedbc9a0..19fdba8cfa2 100644
--- a/gcc/tree-ssa-sccvn.cc
+++ b/gcc/tree-ssa-sccvn.cc
@@ -43,6 +43,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "gimple-fold.h"
 #include "tree-eh.h"
 #include "gimplify.h"
+#include "target.h"
 #include "flags.h"
 #include "dojump.h"
 #include "explow.h"
@@ -5657,10 +5658,16 @@  visit_reference_op_load (tree lhs, tree op, gimple *stmt)
   if (result
       && !useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (op)))
     {
+      machine_mode result_mode = TYPE_MODE (TREE_TYPE (result));
+      machine_mode op_mode = TYPE_MODE (TREE_TYPE (op));
+      poly_uint16 result_mode_precision = GET_MODE_PRECISION (result_mode);
+      poly_uint16 op_mode_precision = GET_MODE_PRECISION (op_mode);
+
       /* Avoid the type punning in case the result mode has padding where
-	 the op we lookup has not.  */
-      if (maybe_lt (GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (result))),
-		    GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op)))))
+	 the op we lookup has not.
+	 Avoid the type punning in case the target mode cannot be tied.  */
+      if (maybe_lt (result_mode_precision, op_mode_precision)
+		    || !targetm.modes_tieable_p (result_mode, op_mode))
 	result = NULL_TREE;
       else
 	{