[v2] RISC-V: Bugfix for rvv bool mode precision adjustment

Message ID 20230302055538.730932-1-pan2.li@intel.com
State Accepted
Headers
Series [v2] RISC-V: Bugfix for rvv bool mode precision adjustment |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

Li, Pan2 via Gcc-patches March 2, 2023, 5:55 a.m. UTC
  From: Pan Li <pan2.li@intel.com>

	Fix the bug of the rvv bool mode precision with the adjustment.
	The bits size of vbool*_t will be adjusted to
	[1, 2, 4, 8, 16, 32, 64] according to the rvv spec 1.0 isa. The
	adjusted mode precison of vbool*_t will help underlying pass to
	make the right decision for both the correctness and optimization.

	Given below sample code:
	void test_1(int8_t * restrict in, int8_t * restrict out)
	{
	  vbool8_t v2 = *(vbool8_t*)in;
	  vbool16_t v5 = *(vbool16_t*)in;
	  *(vbool16_t*)(out + 200) = v5;
	  *(vbool8_t*)(out + 100) = v2;
	}

	Before the precision adjustment:
	addi    a4,a1,100
	vsetvli a5,zero,e8,m1,ta,ma
	addi    a1,a1,200
	vlm.v   v24,0(a0)
	vsm.v   v24,0(a4)
	// Need one vsetvli and vlm.v for correctness here.
	vsm.v   v24,0(a1)

	After the precision adjustment:
	csrr    t0,vlenb
	slli    t1,t0,1
	csrr    a3,vlenb
	sub     sp,sp,t1
	slli    a4,a3,1
	add     a4,a4,sp
	sub     a3,a4,a3
	vsetvli a5,zero,e8,m1,ta,ma
	addi    a2,a1,200
	vlm.v   v24,0(a0)
	vsm.v   v24,0(a3)
	addi    a1,a1,100
	vsetvli a4,zero,e8,mf2,ta,ma
	csrr    t0,vlenb
	vlm.v   v25,0(a3)
	vsm.v   v25,0(a2)
	slli    t1,t0,1
	vsetvli a5,zero,e8,m1,ta,ma
	vsm.v   v24,0(a1)
	add     sp,sp,t1
	jr      ra

	However, there may be some optimization opportunates after
	the mode precision adjustment. It can be token care of in
	the RISC-V backend in the underlying separted PR(s).

	PR 108185
	PR 108654

gcc/ChangeLog:

	* config/riscv/riscv-modes.def (ADJUST_PRECISION):
	* config/riscv/riscv.cc (riscv_v_adjust_precision):
	* config/riscv/riscv.h (riscv_v_adjust_precision):
	* genmodes.cc (ADJUST_PRECISION):
	(emit_mode_adjustments):

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/pr108185-1.c: New test.
	* gcc.target/riscv/pr108185-2.c: New test.
	* gcc.target/riscv/pr108185-3.c: New test.
	* gcc.target/riscv/pr108185-4.c: New test.
	* gcc.target/riscv/pr108185-5.c: New test.
	* gcc.target/riscv/pr108185-6.c: New test.
	* gcc.target/riscv/pr108185-7.c: New test.
	* gcc.target/riscv/pr108185-8.c: New test.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/config/riscv/riscv-modes.def            |  8 +++
 gcc/config/riscv/riscv.cc                   | 12 ++++
 gcc/config/riscv/riscv.h                    |  1 +
 gcc/genmodes.cc                             | 20 +++++-
 gcc/testsuite/gcc.target/riscv/pr108185-1.c | 68 ++++++++++++++++++
 gcc/testsuite/gcc.target/riscv/pr108185-2.c | 68 ++++++++++++++++++
 gcc/testsuite/gcc.target/riscv/pr108185-3.c | 68 ++++++++++++++++++
 gcc/testsuite/gcc.target/riscv/pr108185-4.c | 68 ++++++++++++++++++
 gcc/testsuite/gcc.target/riscv/pr108185-5.c | 68 ++++++++++++++++++
 gcc/testsuite/gcc.target/riscv/pr108185-6.c | 68 ++++++++++++++++++
 gcc/testsuite/gcc.target/riscv/pr108185-7.c | 68 ++++++++++++++++++
 gcc/testsuite/gcc.target/riscv/pr108185-8.c | 77 +++++++++++++++++++++
 12 files changed, 592 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-6.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-7.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-8.c
  

Comments

Richard Sandiford March 2, 2023, 9:43 a.m. UTC | #1
pan2.li@intel.com writes:
> From: Pan Li <pan2.li@intel.com>
>
> 	Fix the bug of the rvv bool mode precision with the adjustment.
> 	The bits size of vbool*_t will be adjusted to
> 	[1, 2, 4, 8, 16, 32, 64] according to the rvv spec 1.0 isa. The
> 	adjusted mode precison of vbool*_t will help underlying pass to
> 	make the right decision for both the correctness and optimization.
>
> 	Given below sample code:
> 	void test_1(int8_t * restrict in, int8_t * restrict out)
> 	{
> 	  vbool8_t v2 = *(vbool8_t*)in;
> 	  vbool16_t v5 = *(vbool16_t*)in;
> 	  *(vbool16_t*)(out + 200) = v5;
> 	  *(vbool8_t*)(out + 100) = v2;
> 	}
>
> 	Before the precision adjustment:
> 	addi    a4,a1,100
> 	vsetvli a5,zero,e8,m1,ta,ma
> 	addi    a1,a1,200
> 	vlm.v   v24,0(a0)
> 	vsm.v   v24,0(a4)
> 	// Need one vsetvli and vlm.v for correctness here.
> 	vsm.v   v24,0(a1)
>
> 	After the precision adjustment:
> 	csrr    t0,vlenb
> 	slli    t1,t0,1
> 	csrr    a3,vlenb
> 	sub     sp,sp,t1
> 	slli    a4,a3,1
> 	add     a4,a4,sp
> 	sub     a3,a4,a3
> 	vsetvli a5,zero,e8,m1,ta,ma
> 	addi    a2,a1,200
> 	vlm.v   v24,0(a0)
> 	vsm.v   v24,0(a3)
> 	addi    a1,a1,100
> 	vsetvli a4,zero,e8,mf2,ta,ma
> 	csrr    t0,vlenb
> 	vlm.v   v25,0(a3)
> 	vsm.v   v25,0(a2)
> 	slli    t1,t0,1
> 	vsetvli a5,zero,e8,m1,ta,ma
> 	vsm.v   v24,0(a1)
> 	add     sp,sp,t1
> 	jr      ra
>
> 	However, there may be some optimization opportunates after
> 	the mode precision adjustment. It can be token care of in
> 	the RISC-V backend in the underlying separted PR(s).
>
> 	PR 108185
> 	PR 108654
>
> gcc/ChangeLog:
>
> 	* config/riscv/riscv-modes.def (ADJUST_PRECISION):
> 	* config/riscv/riscv.cc (riscv_v_adjust_precision):
> 	* config/riscv/riscv.h (riscv_v_adjust_precision):
> 	* genmodes.cc (ADJUST_PRECISION):
> 	(emit_mode_adjustments):
>
> gcc/testsuite/ChangeLog:
>
> 	* gcc.target/riscv/pr108185-1.c: New test.
> 	* gcc.target/riscv/pr108185-2.c: New test.
> 	* gcc.target/riscv/pr108185-3.c: New test.
> 	* gcc.target/riscv/pr108185-4.c: New test.
> 	* gcc.target/riscv/pr108185-5.c: New test.
> 	* gcc.target/riscv/pr108185-6.c: New test.
> 	* gcc.target/riscv/pr108185-7.c: New test.
> 	* gcc.target/riscv/pr108185-8.c: New test.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>  gcc/config/riscv/riscv-modes.def            |  8 +++
>  gcc/config/riscv/riscv.cc                   | 12 ++++
>  gcc/config/riscv/riscv.h                    |  1 +
>  gcc/genmodes.cc                             | 20 +++++-
>  gcc/testsuite/gcc.target/riscv/pr108185-1.c | 68 ++++++++++++++++++
>  gcc/testsuite/gcc.target/riscv/pr108185-2.c | 68 ++++++++++++++++++
>  gcc/testsuite/gcc.target/riscv/pr108185-3.c | 68 ++++++++++++++++++
>  gcc/testsuite/gcc.target/riscv/pr108185-4.c | 68 ++++++++++++++++++
>  gcc/testsuite/gcc.target/riscv/pr108185-5.c | 68 ++++++++++++++++++
>  gcc/testsuite/gcc.target/riscv/pr108185-6.c | 68 ++++++++++++++++++
>  gcc/testsuite/gcc.target/riscv/pr108185-7.c | 68 ++++++++++++++++++
>  gcc/testsuite/gcc.target/riscv/pr108185-8.c | 77 +++++++++++++++++++++
>  12 files changed, 592 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-1.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-2.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-3.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-4.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-5.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-6.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-7.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-8.c
>
> diff --git a/gcc/config/riscv/riscv-modes.def b/gcc/config/riscv/riscv-modes.def
> index d5305efa8a6..110bddce851 100644
> --- a/gcc/config/riscv/riscv-modes.def
> +++ b/gcc/config/riscv/riscv-modes.def
> @@ -72,6 +72,14 @@ ADJUST_BYTESIZE (VNx16BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk);
>  ADJUST_BYTESIZE (VNx32BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk);
>  ADJUST_BYTESIZE (VNx64BI, riscv_v_adjust_nunits (VNx64BImode, 8));
>  
> +ADJUST_PRECISION (VNx1BI, riscv_v_adjust_precision (VNx1BImode, 1));
> +ADJUST_PRECISION (VNx2BI, riscv_v_adjust_precision (VNx2BImode, 2));
> +ADJUST_PRECISION (VNx4BI, riscv_v_adjust_precision (VNx4BImode, 4));
> +ADJUST_PRECISION (VNx8BI, riscv_v_adjust_precision (VNx8BImode, 8));
> +ADJUST_PRECISION (VNx16BI, riscv_v_adjust_precision (VNx16BImode, 16));
> +ADJUST_PRECISION (VNx32BI, riscv_v_adjust_precision (VNx32BImode, 32));
> +ADJUST_PRECISION (VNx64BI, riscv_v_adjust_precision (VNx64BImode, 64));
> +
>  /*
>     | Mode        | MIN_VLEN=32 | MIN_VLEN=32 | MIN_VLEN=64 | MIN_VLEN=64 |
>     |             | LMUL        | SEW/LMUL    | LMUL        | SEW/LMUL    |
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index f11b7949a49..ac5c2527fde 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -1003,6 +1003,18 @@ riscv_v_adjust_nunits (machine_mode mode, int scale)
>    return scale;
>  }
>  
> +/* Call from ADJUST_PRECISION in riscv-modes.def.  Return the correct
> +   PRECISION size for corresponding machine_mode.  */
> +
> +poly_int64
> +riscv_v_adjust_precision (machine_mode mode, int scale)
> +{
> +  if (riscv_v_ext_vector_mode_p (mode))
> +    return riscv_vector_chunks * scale;
> +
> +  return scale;
> +}
> +
>  /* Return true if X is a valid address for machine mode MODE.  If it is,
>     fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
>     effect.  */
> diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
> index 5bc7f2f467d..15b9317a8ce 100644
> --- a/gcc/config/riscv/riscv.h
> +++ b/gcc/config/riscv/riscv.h
> @@ -1025,6 +1025,7 @@ extern unsigned riscv_stack_boundary;
>  extern unsigned riscv_bytes_per_vector_chunk;
>  extern poly_uint16 riscv_vector_chunks;
>  extern poly_int64 riscv_v_adjust_nunits (enum machine_mode, int);
> +extern poly_int64 riscv_v_adjust_precision (enum machine_mode, int);
>  /* The number of bits and bytes in a RVV vector.  */
>  #define BITS_PER_RISCV_VECTOR (poly_uint16 (riscv_vector_chunks * riscv_bytes_per_vector_chunk * 8))
>  #define BYTES_PER_RISCV_VECTOR (poly_uint16 (riscv_vector_chunks * riscv_bytes_per_vector_chunk))
> diff --git a/gcc/genmodes.cc b/gcc/genmodes.cc
> index 2d418f09aab..3452d8fb878 100644
> --- a/gcc/genmodes.cc
> +++ b/gcc/genmodes.cc
> @@ -114,6 +114,7 @@ static struct mode_adjust *adj_alignment;
>  static struct mode_adjust *adj_format;
>  static struct mode_adjust *adj_ibit;
>  static struct mode_adjust *adj_fbit;
> +static struct mode_adjust *adj_precision;
>  
>  /* Mode class operations.  */
>  static enum mode_class
> @@ -819,6 +820,7 @@ make_vector_mode (enum mode_class bclass,
>  #define ADJUST_NUNITS(M, X)    _ADD_ADJUST (nunits, M, X, RANDOM, RANDOM)
>  #define ADJUST_BYTESIZE(M, X)  _ADD_ADJUST (bytesize, M, X, RANDOM, RANDOM)
>  #define ADJUST_ALIGNMENT(M, X) _ADD_ADJUST (alignment, M, X, RANDOM, RANDOM)
> +#define ADJUST_PRECISION(M, X) _ADD_ADJUST (precision, M, X, RANDOM, RANDOM)
>  #define ADJUST_FLOAT_FORMAT(M, X)    _ADD_ADJUST (format, M, X, FLOAT, FLOAT)
>  #define ADJUST_IBIT(M, X)  _ADD_ADJUST (ibit, M, X, ACCUM, UACCUM)
>  #define ADJUST_FBIT(M, X)  _ADD_ADJUST (fbit, M, X, FRACT, UACCUM)
> @@ -1829,8 +1831,9 @@ emit_mode_adjustments (void)
>  	      " (mode_precision[E_%smode], mode_nunits[E_%smode]);\n",
>  	      m->name, m->name);
>        printf ("    mode_precision[E_%smode] = ps * old_factor;\n", m->name);
> -      printf ("    mode_size[E_%smode] = exact_div (mode_precision[E_%smode],"
> -	      " BITS_PER_UNIT);\n", m->name, m->name);
> +      printf ("    if (!multiple_p (mode_precision[E_%smode],"
> +	      " BITS_PER_UNIT, &mode_size[E_%smode]))\n", m->name, m->name);
> +      printf ("      mode_size[E_%smode] = -1;\n", m->name);

Following up from what I said yesterday, I think we need to insert code
to assert that, once all mode adjustments are complete, no mode_size
is still -1.  This would go at the end of emit_mode_adjustments.
I guess for now it could be restricted to the modes in adj_nunits
(if that's simpler).

Thanks,
Richard

>        printf ("    mode_nunits[E_%smode] = ps;\n", m->name);
>        printf ("    adjust_mode_mask (E_%smode);\n", m->name);
>        printf ("  }\n");
> @@ -1963,6 +1966,19 @@ emit_mode_adjustments (void)
>      printf ("\n  /* %s:%d */\n  REAL_MODE_FORMAT (E_%smode) = %s;\n",
>  	    a->file, a->line, a->mode->name, a->adjustment);
>  
> +  /* Adjust precision to the actual bits size.  */
> +  for (a = adj_precision; a; a = a->next)
> +    switch (a->mode->cl)
> +      {
> +	case MODE_VECTOR_BOOL:
> +	  printf ("\n  /* %s:%d.  */\n  ps = %s;\n", a->file, a->line,
> +		  a->adjustment);
> +	  printf ("  mode_precision[E_%smode] = ps;\n", a->mode->name);
> +	  break;
> +	default:
> +	  break;
> +      }
> +
>    puts ("}");
>  }
>  
> diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-1.c b/gcc/testsuite/gcc.target/riscv/pr108185-1.c
> new file mode 100644
> index 00000000000..e70960c5b6d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/pr108185-1.c
> @@ -0,0 +1,68 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
> +
> +#include "riscv_vector.h"
> +
> +void
> +test_vbool1_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
> +    vbool1_t v1 = *(vbool1_t*)in;
> +    vbool2_t v2 = *(vbool2_t*)in;
> +
> +    *(vbool1_t*)(out + 100) = v1;
> +    *(vbool2_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool1_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
> +    vbool1_t v1 = *(vbool1_t*)in;
> +    vbool4_t v2 = *(vbool4_t*)in;
> +
> +    *(vbool1_t*)(out + 100) = v1;
> +    *(vbool4_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool1_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
> +    vbool1_t v1 = *(vbool1_t*)in;
> +    vbool8_t v2 = *(vbool8_t*)in;
> +
> +    *(vbool1_t*)(out + 100) = v1;
> +    *(vbool8_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool1_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
> +    vbool1_t v1 = *(vbool1_t*)in;
> +    vbool16_t v2 = *(vbool16_t*)in;
> +
> +    *(vbool1_t*)(out + 100) = v1;
> +    *(vbool16_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool1_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
> +    vbool1_t v1 = *(vbool1_t*)in;
> +    vbool32_t v2 = *(vbool32_t*)in;
> +
> +    *(vbool1_t*)(out + 100) = v1;
> +    *(vbool32_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool1_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
> +    vbool1_t v1 = *(vbool1_t*)in;
> +    vbool64_t v2 = *(vbool64_t*)in;
> +
> +    *(vbool1_t*)(out + 100) = v1;
> +    *(vbool64_t*)(out + 200) = v2;
> +}
> +
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 6 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
> +/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 18 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-2.c b/gcc/testsuite/gcc.target/riscv/pr108185-2.c
> new file mode 100644
> index 00000000000..dcc7a644a88
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/pr108185-2.c
> @@ -0,0 +1,68 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
> +
> +#include "riscv_vector.h"
> +
> +void
> +test_vbool2_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
> +    vbool2_t v1 = *(vbool2_t*)in;
> +    vbool1_t v2 = *(vbool1_t*)in;
> +
> +    *(vbool2_t*)(out + 100) = v1;
> +    *(vbool1_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool2_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
> +    vbool2_t v1 = *(vbool2_t*)in;
> +    vbool4_t v2 = *(vbool4_t*)in;
> +
> +    *(vbool2_t*)(out + 100) = v1;
> +    *(vbool4_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool2_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
> +    vbool2_t v1 = *(vbool2_t*)in;
> +    vbool8_t v2 = *(vbool8_t*)in;
> +
> +    *(vbool2_t*)(out + 100) = v1;
> +    *(vbool8_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool2_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
> +    vbool2_t v1 = *(vbool2_t*)in;
> +    vbool16_t v2 = *(vbool16_t*)in;
> +
> +    *(vbool2_t*)(out + 100) = v1;
> +    *(vbool16_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool2_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
> +    vbool2_t v1 = *(vbool2_t*)in;
> +    vbool32_t v2 = *(vbool32_t*)in;
> +
> +    *(vbool2_t*)(out + 100) = v1;
> +    *(vbool32_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool2_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
> +    vbool2_t v1 = *(vbool2_t*)in;
> +    vbool64_t v2 = *(vbool64_t*)in;
> +
> +    *(vbool2_t*)(out + 100) = v1;
> +    *(vbool64_t*)(out + 200) = v2;
> +}
> +
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 6 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
> +/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 17 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-3.c b/gcc/testsuite/gcc.target/riscv/pr108185-3.c
> new file mode 100644
> index 00000000000..3af0513e006
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/pr108185-3.c
> @@ -0,0 +1,68 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
> +
> +#include "riscv_vector.h"
> +
> +void
> +test_vbool4_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
> +    vbool4_t v1 = *(vbool4_t*)in;
> +    vbool1_t v2 = *(vbool1_t*)in;
> +
> +    *(vbool4_t*)(out + 100) = v1;
> +    *(vbool1_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool4_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
> +    vbool4_t v1 = *(vbool4_t*)in;
> +    vbool2_t v2 = *(vbool2_t*)in;
> +
> +    *(vbool4_t*)(out + 100) = v1;
> +    *(vbool2_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool4_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
> +    vbool4_t v1 = *(vbool4_t*)in;
> +    vbool8_t v2 = *(vbool8_t*)in;
> +
> +    *(vbool4_t*)(out + 100) = v1;
> +    *(vbool8_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool4_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
> +    vbool4_t v1 = *(vbool4_t*)in;
> +    vbool16_t v2 = *(vbool16_t*)in;
> +
> +    *(vbool4_t*)(out + 100) = v1;
> +    *(vbool16_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool4_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
> +    vbool4_t v1 = *(vbool4_t*)in;
> +    vbool32_t v2 = *(vbool32_t*)in;
> +
> +    *(vbool4_t*)(out + 100) = v1;
> +    *(vbool32_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool4_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
> +    vbool4_t v1 = *(vbool4_t*)in;
> +    vbool64_t v2 = *(vbool64_t*)in;
> +
> +    *(vbool4_t*)(out + 100) = v1;
> +    *(vbool64_t*)(out + 200) = v2;
> +}
> +
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 6 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
> +/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 16 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-4.c b/gcc/testsuite/gcc.target/riscv/pr108185-4.c
> new file mode 100644
> index 00000000000..ea3c360d756
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/pr108185-4.c
> @@ -0,0 +1,68 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
> +
> +#include "riscv_vector.h"
> +
> +void
> +test_vbool8_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
> +    vbool8_t v1 = *(vbool8_t*)in;
> +    vbool1_t v2 = *(vbool1_t*)in;
> +
> +    *(vbool8_t*)(out + 100) = v1;
> +    *(vbool1_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool8_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
> +    vbool8_t v1 = *(vbool8_t*)in;
> +    vbool2_t v2 = *(vbool2_t*)in;
> +
> +    *(vbool8_t*)(out + 100) = v1;
> +    *(vbool2_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool8_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
> +    vbool8_t v1 = *(vbool8_t*)in;
> +    vbool4_t v2 = *(vbool4_t*)in;
> +
> +    *(vbool8_t*)(out + 100) = v1;
> +    *(vbool4_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool8_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
> +    vbool8_t v1 = *(vbool8_t*)in;
> +    vbool16_t v2 = *(vbool16_t*)in;
> +
> +    *(vbool8_t*)(out + 100) = v1;
> +    *(vbool16_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool8_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
> +    vbool8_t v1 = *(vbool8_t*)in;
> +    vbool32_t v2 = *(vbool32_t*)in;
> +
> +    *(vbool8_t*)(out + 100) = v1;
> +    *(vbool32_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool8_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
> +    vbool8_t v1 = *(vbool8_t*)in;
> +    vbool64_t v2 = *(vbool64_t*)in;
> +
> +    *(vbool8_t*)(out + 100) = v1;
> +    *(vbool64_t*)(out + 200) = v2;
> +}
> +
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 6 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
> +/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 15 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-5.c b/gcc/testsuite/gcc.target/riscv/pr108185-5.c
> new file mode 100644
> index 00000000000..9fc659d2402
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/pr108185-5.c
> @@ -0,0 +1,68 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
> +
> +#include "riscv_vector.h"
> +
> +void
> +test_vbool16_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
> +    vbool16_t v1 = *(vbool16_t*)in;
> +    vbool1_t v2 = *(vbool1_t*)in;
> +
> +    *(vbool16_t*)(out + 100) = v1;
> +    *(vbool1_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool16_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
> +    vbool16_t v1 = *(vbool16_t*)in;
> +    vbool2_t v2 = *(vbool2_t*)in;
> +
> +    *(vbool16_t*)(out + 100) = v1;
> +    *(vbool2_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool16_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
> +    vbool16_t v1 = *(vbool16_t*)in;
> +    vbool4_t v2 = *(vbool4_t*)in;
> +
> +    *(vbool16_t*)(out + 100) = v1;
> +    *(vbool4_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool16_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
> +    vbool16_t v1 = *(vbool16_t*)in;
> +    vbool8_t v2 = *(vbool8_t*)in;
> +
> +    *(vbool16_t*)(out + 100) = v1;
> +    *(vbool8_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool16_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
> +    vbool16_t v1 = *(vbool16_t*)in;
> +    vbool32_t v2 = *(vbool32_t*)in;
> +
> +    *(vbool16_t*)(out + 100) = v1;
> +    *(vbool32_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool16_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
> +    vbool16_t v1 = *(vbool16_t*)in;
> +    vbool64_t v2 = *(vbool64_t*)in;
> +
> +    *(vbool16_t*)(out + 100) = v1;
> +    *(vbool64_t*)(out + 200) = v2;
> +}
> +
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 6 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
> +/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 14 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-6.c b/gcc/testsuite/gcc.target/riscv/pr108185-6.c
> new file mode 100644
> index 00000000000..98275e5267d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/pr108185-6.c
> @@ -0,0 +1,68 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
> +
> +#include "riscv_vector.h"
> +
> +void
> +test_vbool32_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
> +    vbool32_t v1 = *(vbool32_t*)in;
> +    vbool1_t v2 = *(vbool1_t*)in;
> +
> +    *(vbool32_t*)(out + 100) = v1;
> +    *(vbool1_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool32_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
> +    vbool32_t v1 = *(vbool32_t*)in;
> +    vbool2_t v2 = *(vbool2_t*)in;
> +
> +    *(vbool32_t*)(out + 100) = v1;
> +    *(vbool2_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool32_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
> +    vbool32_t v1 = *(vbool32_t*)in;
> +    vbool4_t v2 = *(vbool4_t*)in;
> +
> +    *(vbool32_t*)(out + 100) = v1;
> +    *(vbool4_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool32_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
> +    vbool32_t v1 = *(vbool32_t*)in;
> +    vbool8_t v2 = *(vbool8_t*)in;
> +
> +    *(vbool32_t*)(out + 100) = v1;
> +    *(vbool8_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool32_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
> +    vbool32_t v1 = *(vbool32_t*)in;
> +    vbool16_t v2 = *(vbool16_t*)in;
> +
> +    *(vbool32_t*)(out + 100) = v1;
> +    *(vbool16_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool32_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
> +    vbool32_t v1 = *(vbool32_t*)in;
> +    vbool64_t v2 = *(vbool64_t*)in;
> +
> +    *(vbool32_t*)(out + 100) = v1;
> +    *(vbool64_t*)(out + 200) = v2;
> +}
> +
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 6 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
> +/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 13 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-7.c b/gcc/testsuite/gcc.target/riscv/pr108185-7.c
> new file mode 100644
> index 00000000000..8f6f0b11f09
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/pr108185-7.c
> @@ -0,0 +1,68 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
> +
> +#include "riscv_vector.h"
> +
> +void
> +test_vbool64_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
> +    vbool64_t v1 = *(vbool64_t*)in;
> +    vbool1_t v2 = *(vbool1_t*)in;
> +
> +    *(vbool64_t*)(out + 100) = v1;
> +    *(vbool1_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool64_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
> +    vbool64_t v1 = *(vbool64_t*)in;
> +    vbool2_t v2 = *(vbool2_t*)in;
> +
> +    *(vbool64_t*)(out + 100) = v1;
> +    *(vbool2_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool64_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
> +    vbool64_t v1 = *(vbool64_t*)in;
> +    vbool4_t v2 = *(vbool4_t*)in;
> +
> +    *(vbool64_t*)(out + 100) = v1;
> +    *(vbool4_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool64_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
> +    vbool64_t v1 = *(vbool64_t*)in;
> +    vbool8_t v2 = *(vbool8_t*)in;
> +
> +    *(vbool64_t*)(out + 100) = v1;
> +    *(vbool8_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool64_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
> +    vbool64_t v1 = *(vbool64_t*)in;
> +    vbool16_t v2 = *(vbool16_t*)in;
> +
> +    *(vbool64_t*)(out + 100) = v1;
> +    *(vbool16_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool64_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
> +    vbool64_t v1 = *(vbool64_t*)in;
> +    vbool32_t v2 = *(vbool32_t*)in;
> +
> +    *(vbool64_t*)(out + 100) = v1;
> +    *(vbool32_t*)(out + 200) = v2;
> +}
> +
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 6 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
> +/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-8.c b/gcc/testsuite/gcc.target/riscv/pr108185-8.c
> new file mode 100644
> index 00000000000..d96959dd064
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/pr108185-8.c
> @@ -0,0 +1,77 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
> +
> +#include "riscv_vector.h"
> +
> +void
> +test_vbool1_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
> +    vbool1_t v1 = *(vbool1_t*)in;
> +    vbool1_t v2 = *(vbool1_t*)in;
> +
> +    *(vbool1_t*)(out + 100) = v1;
> +    *(vbool1_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool2_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
> +    vbool2_t v1 = *(vbool2_t*)in;
> +    vbool2_t v2 = *(vbool2_t*)in;
> +
> +    *(vbool2_t*)(out + 100) = v1;
> +    *(vbool2_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool4_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
> +    vbool4_t v1 = *(vbool4_t*)in;
> +    vbool4_t v2 = *(vbool4_t*)in;
> +
> +    *(vbool4_t*)(out + 100) = v1;
> +    *(vbool4_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool8_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
> +    vbool8_t v1 = *(vbool8_t*)in;
> +    vbool8_t v2 = *(vbool8_t*)in;
> +
> +    *(vbool8_t*)(out + 100) = v1;
> +    *(vbool8_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool16_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
> +    vbool16_t v1 = *(vbool16_t*)in;
> +    vbool16_t v2 = *(vbool16_t*)in;
> +
> +    *(vbool16_t*)(out + 100) = v1;
> +    *(vbool16_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool32_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
> +    vbool32_t v1 = *(vbool32_t*)in;
> +    vbool32_t v2 = *(vbool32_t*)in;
> +
> +    *(vbool32_t*)(out + 100) = v1;
> +    *(vbool32_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool64_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
> +    vbool64_t v1 = *(vbool64_t*)in;
> +    vbool64_t v2 = *(vbool64_t*)in;
> +
> +    *(vbool64_t*)(out + 100) = v1;
> +    *(vbool64_t*)(out + 200) = v2;
> +}
> +
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 7 } } */
> +/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 14 } } */
  
Li, Pan2 via Gcc-patches March 2, 2023, 2:46 p.m. UTC | #2
Oops, looks I missed that part for assertion. Thank you for coaching.
Added and tested the below changes at the end of emit_mode_adjustments already but looks we may have other problems about the size, the precision and the C types.

Looks like I need to hold this PATCH for a while until we have a conclusion. Feel free to let me know if there is mistake or misleading.

+ 
+  for_all_modes (c, m)
+    printf ("  gcc_checking_assert (!mode_size[E_%smode].is_constant()"
+           " || mode_size[E_%smode].coeffs[0] != -1);\n", m->name, m->name);
+

Thank you and have a nice day!

Pan


-----Original Message-----
From: Richard Sandiford <richard.sandiford@arm.com> 
Sent: Thursday, March 2, 2023 5:44 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@sifive.com; rguenther@suse.de
Subject: Re: [PATCH v2] RISC-V: Bugfix for rvv bool mode precision adjustment

pan2.li@intel.com writes:
> From: Pan Li <pan2.li@intel.com>
>
> 	Fix the bug of the rvv bool mode precision with the adjustment.
> 	The bits size of vbool*_t will be adjusted to
> 	[1, 2, 4, 8, 16, 32, 64] according to the rvv spec 1.0 isa. The
> 	adjusted mode precison of vbool*_t will help underlying pass to
> 	make the right decision for both the correctness and optimization.
>
> 	Given below sample code:
> 	void test_1(int8_t * restrict in, int8_t * restrict out)
> 	{
> 	  vbool8_t v2 = *(vbool8_t*)in;
> 	  vbool16_t v5 = *(vbool16_t*)in;
> 	  *(vbool16_t*)(out + 200) = v5;
> 	  *(vbool8_t*)(out + 100) = v2;
> 	}
>
> 	Before the precision adjustment:
> 	addi    a4,a1,100
> 	vsetvli a5,zero,e8,m1,ta,ma
> 	addi    a1,a1,200
> 	vlm.v   v24,0(a0)
> 	vsm.v   v24,0(a4)
> 	// Need one vsetvli and vlm.v for correctness here.
> 	vsm.v   v24,0(a1)
>
> 	After the precision adjustment:
> 	csrr    t0,vlenb
> 	slli    t1,t0,1
> 	csrr    a3,vlenb
> 	sub     sp,sp,t1
> 	slli    a4,a3,1
> 	add     a4,a4,sp
> 	sub     a3,a4,a3
> 	vsetvli a5,zero,e8,m1,ta,ma
> 	addi    a2,a1,200
> 	vlm.v   v24,0(a0)
> 	vsm.v   v24,0(a3)
> 	addi    a1,a1,100
> 	vsetvli a4,zero,e8,mf2,ta,ma
> 	csrr    t0,vlenb
> 	vlm.v   v25,0(a3)
> 	vsm.v   v25,0(a2)
> 	slli    t1,t0,1
> 	vsetvli a5,zero,e8,m1,ta,ma
> 	vsm.v   v24,0(a1)
> 	add     sp,sp,t1
> 	jr      ra
>
> 	However, there may be some optimization opportunates after
> 	the mode precision adjustment. It can be token care of in
> 	the RISC-V backend in the underlying separted PR(s).
>
> 	PR 108185
> 	PR 108654
>
> gcc/ChangeLog:
>
> 	* config/riscv/riscv-modes.def (ADJUST_PRECISION):
> 	* config/riscv/riscv.cc (riscv_v_adjust_precision):
> 	* config/riscv/riscv.h (riscv_v_adjust_precision):
> 	* genmodes.cc (ADJUST_PRECISION):
> 	(emit_mode_adjustments):
>
> gcc/testsuite/ChangeLog:
>
> 	* gcc.target/riscv/pr108185-1.c: New test.
> 	* gcc.target/riscv/pr108185-2.c: New test.
> 	* gcc.target/riscv/pr108185-3.c: New test.
> 	* gcc.target/riscv/pr108185-4.c: New test.
> 	* gcc.target/riscv/pr108185-5.c: New test.
> 	* gcc.target/riscv/pr108185-6.c: New test.
> 	* gcc.target/riscv/pr108185-7.c: New test.
> 	* gcc.target/riscv/pr108185-8.c: New test.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>  gcc/config/riscv/riscv-modes.def            |  8 +++
>  gcc/config/riscv/riscv.cc                   | 12 ++++
>  gcc/config/riscv/riscv.h                    |  1 +
>  gcc/genmodes.cc                             | 20 +++++-
>  gcc/testsuite/gcc.target/riscv/pr108185-1.c | 68 ++++++++++++++++++  
> gcc/testsuite/gcc.target/riscv/pr108185-2.c | 68 ++++++++++++++++++  
> gcc/testsuite/gcc.target/riscv/pr108185-3.c | 68 ++++++++++++++++++  
> gcc/testsuite/gcc.target/riscv/pr108185-4.c | 68 ++++++++++++++++++  
> gcc/testsuite/gcc.target/riscv/pr108185-5.c | 68 ++++++++++++++++++  
> gcc/testsuite/gcc.target/riscv/pr108185-6.c | 68 ++++++++++++++++++  
> gcc/testsuite/gcc.target/riscv/pr108185-7.c | 68 ++++++++++++++++++  
> gcc/testsuite/gcc.target/riscv/pr108185-8.c | 77 +++++++++++++++++++++
>  12 files changed, 592 insertions(+), 2 deletions(-)  create mode 
> 100644 gcc/testsuite/gcc.target/riscv/pr108185-1.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-2.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-3.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-4.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-5.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-6.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-7.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-8.c
>
> diff --git a/gcc/config/riscv/riscv-modes.def 
> b/gcc/config/riscv/riscv-modes.def
> index d5305efa8a6..110bddce851 100644
> --- a/gcc/config/riscv/riscv-modes.def
> +++ b/gcc/config/riscv/riscv-modes.def
> @@ -72,6 +72,14 @@ ADJUST_BYTESIZE (VNx16BI, riscv_vector_chunks * 
> riscv_bytes_per_vector_chunk);  ADJUST_BYTESIZE (VNx32BI, 
> riscv_vector_chunks * riscv_bytes_per_vector_chunk);  ADJUST_BYTESIZE 
> (VNx64BI, riscv_v_adjust_nunits (VNx64BImode, 8));
>  
> +ADJUST_PRECISION (VNx1BI, riscv_v_adjust_precision (VNx1BImode, 1)); 
> +ADJUST_PRECISION (VNx2BI, riscv_v_adjust_precision (VNx2BImode, 2)); 
> +ADJUST_PRECISION (VNx4BI, riscv_v_adjust_precision (VNx4BImode, 4)); 
> +ADJUST_PRECISION (VNx8BI, riscv_v_adjust_precision (VNx8BImode, 8)); 
> +ADJUST_PRECISION (VNx16BI, riscv_v_adjust_precision (VNx16BImode, 
> +16)); ADJUST_PRECISION (VNx32BI, riscv_v_adjust_precision 
> +(VNx32BImode, 32)); ADJUST_PRECISION (VNx64BI, 
> +riscv_v_adjust_precision (VNx64BImode, 64));
> +
>  /*
>     | Mode        | MIN_VLEN=32 | MIN_VLEN=32 | MIN_VLEN=64 | MIN_VLEN=64 |
>     |             | LMUL        | SEW/LMUL    | LMUL        | SEW/LMUL    |
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc 
> index f11b7949a49..ac5c2527fde 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -1003,6 +1003,18 @@ riscv_v_adjust_nunits (machine_mode mode, int scale)
>    return scale;
>  }
>  
> +/* Call from ADJUST_PRECISION in riscv-modes.def.  Return the correct
> +   PRECISION size for corresponding machine_mode.  */
> +
> +poly_int64
> +riscv_v_adjust_precision (machine_mode mode, int scale) {
> +  if (riscv_v_ext_vector_mode_p (mode))
> +    return riscv_vector_chunks * scale;
> +
> +  return scale;
> +}
> +
>  /* Return true if X is a valid address for machine mode MODE.  If it is,
>     fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
>     effect.  */
> diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h index 
> 5bc7f2f467d..15b9317a8ce 100644
> --- a/gcc/config/riscv/riscv.h
> +++ b/gcc/config/riscv/riscv.h
> @@ -1025,6 +1025,7 @@ extern unsigned riscv_stack_boundary;  extern 
> unsigned riscv_bytes_per_vector_chunk;  extern poly_uint16 
> riscv_vector_chunks;  extern poly_int64 riscv_v_adjust_nunits (enum 
> machine_mode, int);
> +extern poly_int64 riscv_v_adjust_precision (enum machine_mode, int);
>  /* The number of bits and bytes in a RVV vector.  */  #define 
> BITS_PER_RISCV_VECTOR (poly_uint16 (riscv_vector_chunks * 
> riscv_bytes_per_vector_chunk * 8))  #define BYTES_PER_RISCV_VECTOR 
> (poly_uint16 (riscv_vector_chunks * riscv_bytes_per_vector_chunk)) 
> diff --git a/gcc/genmodes.cc b/gcc/genmodes.cc index 
> 2d418f09aab..3452d8fb878 100644
> --- a/gcc/genmodes.cc
> +++ b/gcc/genmodes.cc
> @@ -114,6 +114,7 @@ static struct mode_adjust *adj_alignment;  static 
> struct mode_adjust *adj_format;  static struct mode_adjust *adj_ibit;  
> static struct mode_adjust *adj_fbit;
> +static struct mode_adjust *adj_precision;
>  
>  /* Mode class operations.  */
>  static enum mode_class
> @@ -819,6 +820,7 @@ make_vector_mode (enum mode_class bclass,
>  #define ADJUST_NUNITS(M, X)    _ADD_ADJUST (nunits, M, X, RANDOM, RANDOM)
>  #define ADJUST_BYTESIZE(M, X)  _ADD_ADJUST (bytesize, M, X, RANDOM, 
> RANDOM)  #define ADJUST_ALIGNMENT(M, X) _ADD_ADJUST (alignment, M, X, 
> RANDOM, RANDOM)
> +#define ADJUST_PRECISION(M, X) _ADD_ADJUST (precision, M, X, RANDOM, 
> +RANDOM)
>  #define ADJUST_FLOAT_FORMAT(M, X)    _ADD_ADJUST (format, M, X, FLOAT, FLOAT)
>  #define ADJUST_IBIT(M, X)  _ADD_ADJUST (ibit, M, X, ACCUM, UACCUM)  
> #define ADJUST_FBIT(M, X)  _ADD_ADJUST (fbit, M, X, FRACT, UACCUM) @@ 
> -1829,8 +1831,9 @@ emit_mode_adjustments (void)
>  	      " (mode_precision[E_%smode], mode_nunits[E_%smode]);\n",
>  	      m->name, m->name);
>        printf ("    mode_precision[E_%smode] = ps * old_factor;\n", m->name);
> -      printf ("    mode_size[E_%smode] = exact_div (mode_precision[E_%smode],"
> -	      " BITS_PER_UNIT);\n", m->name, m->name);
> +      printf ("    if (!multiple_p (mode_precision[E_%smode],"
> +	      " BITS_PER_UNIT, &mode_size[E_%smode]))\n", m->name, m->name);
> +      printf ("      mode_size[E_%smode] = -1;\n", m->name);

Following up from what I said yesterday, I think we need to insert code to assert that, once all mode adjustments are complete, no mode_size is still -1.  This would go at the end of emit_mode_adjustments.
I guess for now it could be restricted to the modes in adj_nunits (if that's simpler).

Thanks,
Richard

>        printf ("    mode_nunits[E_%smode] = ps;\n", m->name);
>        printf ("    adjust_mode_mask (E_%smode);\n", m->name);
>        printf ("  }\n");
> @@ -1963,6 +1966,19 @@ emit_mode_adjustments (void)
>      printf ("\n  /* %s:%d */\n  REAL_MODE_FORMAT (E_%smode) = %s;\n",
>  	    a->file, a->line, a->mode->name, a->adjustment);
>  
> +  /* Adjust precision to the actual bits size.  */
> +  for (a = adj_precision; a; a = a->next)
> +    switch (a->mode->cl)
> +      {
> +	case MODE_VECTOR_BOOL:
> +	  printf ("\n  /* %s:%d.  */\n  ps = %s;\n", a->file, a->line,
> +		  a->adjustment);
> +	  printf ("  mode_precision[E_%smode] = ps;\n", a->mode->name);
> +	  break;
> +	default:
> +	  break;
> +      }
> +
>    puts ("}");
>  }
>  
> diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-1.c 
> b/gcc/testsuite/gcc.target/riscv/pr108185-1.c
> new file mode 100644
> index 00000000000..e70960c5b6d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/pr108185-1.c
> @@ -0,0 +1,68 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
> +
> +#include "riscv_vector.h"
> +
> +void
> +test_vbool1_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
> +    vbool1_t v1 = *(vbool1_t*)in;
> +    vbool2_t v2 = *(vbool2_t*)in;
> +
> +    *(vbool1_t*)(out + 100) = v1;
> +    *(vbool2_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool1_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
> +    vbool1_t v1 = *(vbool1_t*)in;
> +    vbool4_t v2 = *(vbool4_t*)in;
> +
> +    *(vbool1_t*)(out + 100) = v1;
> +    *(vbool4_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool1_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
> +    vbool1_t v1 = *(vbool1_t*)in;
> +    vbool8_t v2 = *(vbool8_t*)in;
> +
> +    *(vbool1_t*)(out + 100) = v1;
> +    *(vbool8_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool1_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
> +    vbool1_t v1 = *(vbool1_t*)in;
> +    vbool16_t v2 = *(vbool16_t*)in;
> +
> +    *(vbool1_t*)(out + 100) = v1;
> +    *(vbool16_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool1_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
> +    vbool1_t v1 = *(vbool1_t*)in;
> +    vbool32_t v2 = *(vbool32_t*)in;
> +
> +    *(vbool1_t*)(out + 100) = v1;
> +    *(vbool32_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool1_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
> +    vbool1_t v1 = *(vbool1_t*)in;
> +    vbool64_t v2 = *(vbool64_t*)in;
> +
> +    *(vbool1_t*)(out + 100) = v1;
> +    *(vbool64_t*)(out + 200) = v2;
> +}
> +
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 6 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 18 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-2.c 
> b/gcc/testsuite/gcc.target/riscv/pr108185-2.c
> new file mode 100644
> index 00000000000..dcc7a644a88
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/pr108185-2.c
> @@ -0,0 +1,68 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
> +
> +#include "riscv_vector.h"
> +
> +void
> +test_vbool2_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
> +    vbool2_t v1 = *(vbool2_t*)in;
> +    vbool1_t v2 = *(vbool1_t*)in;
> +
> +    *(vbool2_t*)(out + 100) = v1;
> +    *(vbool1_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool2_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
> +    vbool2_t v1 = *(vbool2_t*)in;
> +    vbool4_t v2 = *(vbool4_t*)in;
> +
> +    *(vbool2_t*)(out + 100) = v1;
> +    *(vbool4_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool2_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
> +    vbool2_t v1 = *(vbool2_t*)in;
> +    vbool8_t v2 = *(vbool8_t*)in;
> +
> +    *(vbool2_t*)(out + 100) = v1;
> +    *(vbool8_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool2_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
> +    vbool2_t v1 = *(vbool2_t*)in;
> +    vbool16_t v2 = *(vbool16_t*)in;
> +
> +    *(vbool2_t*)(out + 100) = v1;
> +    *(vbool16_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool2_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
> +    vbool2_t v1 = *(vbool2_t*)in;
> +    vbool32_t v2 = *(vbool32_t*)in;
> +
> +    *(vbool2_t*)(out + 100) = v1;
> +    *(vbool32_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool2_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
> +    vbool2_t v1 = *(vbool2_t*)in;
> +    vbool64_t v2 = *(vbool64_t*)in;
> +
> +    *(vbool2_t*)(out + 100) = v1;
> +    *(vbool64_t*)(out + 200) = v2;
> +}
> +
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 6 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 17 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-3.c 
> b/gcc/testsuite/gcc.target/riscv/pr108185-3.c
> new file mode 100644
> index 00000000000..3af0513e006
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/pr108185-3.c
> @@ -0,0 +1,68 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
> +
> +#include "riscv_vector.h"
> +
> +void
> +test_vbool4_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
> +    vbool4_t v1 = *(vbool4_t*)in;
> +    vbool1_t v2 = *(vbool1_t*)in;
> +
> +    *(vbool4_t*)(out + 100) = v1;
> +    *(vbool1_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool4_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
> +    vbool4_t v1 = *(vbool4_t*)in;
> +    vbool2_t v2 = *(vbool2_t*)in;
> +
> +    *(vbool4_t*)(out + 100) = v1;
> +    *(vbool2_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool4_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
> +    vbool4_t v1 = *(vbool4_t*)in;
> +    vbool8_t v2 = *(vbool8_t*)in;
> +
> +    *(vbool4_t*)(out + 100) = v1;
> +    *(vbool8_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool4_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
> +    vbool4_t v1 = *(vbool4_t*)in;
> +    vbool16_t v2 = *(vbool16_t*)in;
> +
> +    *(vbool4_t*)(out + 100) = v1;
> +    *(vbool16_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool4_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
> +    vbool4_t v1 = *(vbool4_t*)in;
> +    vbool32_t v2 = *(vbool32_t*)in;
> +
> +    *(vbool4_t*)(out + 100) = v1;
> +    *(vbool32_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool4_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
> +    vbool4_t v1 = *(vbool4_t*)in;
> +    vbool64_t v2 = *(vbool64_t*)in;
> +
> +    *(vbool4_t*)(out + 100) = v1;
> +    *(vbool64_t*)(out + 200) = v2;
> +}
> +
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 6 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 16 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-4.c 
> b/gcc/testsuite/gcc.target/riscv/pr108185-4.c
> new file mode 100644
> index 00000000000..ea3c360d756
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/pr108185-4.c
> @@ -0,0 +1,68 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
> +
> +#include "riscv_vector.h"
> +
> +void
> +test_vbool8_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
> +    vbool8_t v1 = *(vbool8_t*)in;
> +    vbool1_t v2 = *(vbool1_t*)in;
> +
> +    *(vbool8_t*)(out + 100) = v1;
> +    *(vbool1_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool8_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
> +    vbool8_t v1 = *(vbool8_t*)in;
> +    vbool2_t v2 = *(vbool2_t*)in;
> +
> +    *(vbool8_t*)(out + 100) = v1;
> +    *(vbool2_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool8_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
> +    vbool8_t v1 = *(vbool8_t*)in;
> +    vbool4_t v2 = *(vbool4_t*)in;
> +
> +    *(vbool8_t*)(out + 100) = v1;
> +    *(vbool4_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool8_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
> +    vbool8_t v1 = *(vbool8_t*)in;
> +    vbool16_t v2 = *(vbool16_t*)in;
> +
> +    *(vbool8_t*)(out + 100) = v1;
> +    *(vbool16_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool8_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
> +    vbool8_t v1 = *(vbool8_t*)in;
> +    vbool32_t v2 = *(vbool32_t*)in;
> +
> +    *(vbool8_t*)(out + 100) = v1;
> +    *(vbool32_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool8_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
> +    vbool8_t v1 = *(vbool8_t*)in;
> +    vbool64_t v2 = *(vbool64_t*)in;
> +
> +    *(vbool8_t*)(out + 100) = v1;
> +    *(vbool64_t*)(out + 200) = v2;
> +}
> +
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 6 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 15 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-5.c 
> b/gcc/testsuite/gcc.target/riscv/pr108185-5.c
> new file mode 100644
> index 00000000000..9fc659d2402
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/pr108185-5.c
> @@ -0,0 +1,68 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
> +
> +#include "riscv_vector.h"
> +
> +void
> +test_vbool16_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
> +    vbool16_t v1 = *(vbool16_t*)in;
> +    vbool1_t v2 = *(vbool1_t*)in;
> +
> +    *(vbool16_t*)(out + 100) = v1;
> +    *(vbool1_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool16_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
> +    vbool16_t v1 = *(vbool16_t*)in;
> +    vbool2_t v2 = *(vbool2_t*)in;
> +
> +    *(vbool16_t*)(out + 100) = v1;
> +    *(vbool2_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool16_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
> +    vbool16_t v1 = *(vbool16_t*)in;
> +    vbool4_t v2 = *(vbool4_t*)in;
> +
> +    *(vbool16_t*)(out + 100) = v1;
> +    *(vbool4_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool16_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
> +    vbool16_t v1 = *(vbool16_t*)in;
> +    vbool8_t v2 = *(vbool8_t*)in;
> +
> +    *(vbool16_t*)(out + 100) = v1;
> +    *(vbool8_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool16_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
> +    vbool16_t v1 = *(vbool16_t*)in;
> +    vbool32_t v2 = *(vbool32_t*)in;
> +
> +    *(vbool16_t*)(out + 100) = v1;
> +    *(vbool32_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool16_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
> +    vbool16_t v1 = *(vbool16_t*)in;
> +    vbool64_t v2 = *(vbool64_t*)in;
> +
> +    *(vbool16_t*)(out + 100) = v1;
> +    *(vbool64_t*)(out + 200) = v2;
> +}
> +
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 6 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 14 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-6.c 
> b/gcc/testsuite/gcc.target/riscv/pr108185-6.c
> new file mode 100644
> index 00000000000..98275e5267d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/pr108185-6.c
> @@ -0,0 +1,68 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
> +
> +#include "riscv_vector.h"
> +
> +void
> +test_vbool32_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
> +    vbool32_t v1 = *(vbool32_t*)in;
> +    vbool1_t v2 = *(vbool1_t*)in;
> +
> +    *(vbool32_t*)(out + 100) = v1;
> +    *(vbool1_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool32_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
> +    vbool32_t v1 = *(vbool32_t*)in;
> +    vbool2_t v2 = *(vbool2_t*)in;
> +
> +    *(vbool32_t*)(out + 100) = v1;
> +    *(vbool2_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool32_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
> +    vbool32_t v1 = *(vbool32_t*)in;
> +    vbool4_t v2 = *(vbool4_t*)in;
> +
> +    *(vbool32_t*)(out + 100) = v1;
> +    *(vbool4_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool32_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
> +    vbool32_t v1 = *(vbool32_t*)in;
> +    vbool8_t v2 = *(vbool8_t*)in;
> +
> +    *(vbool32_t*)(out + 100) = v1;
> +    *(vbool8_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool32_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
> +    vbool32_t v1 = *(vbool32_t*)in;
> +    vbool16_t v2 = *(vbool16_t*)in;
> +
> +    *(vbool32_t*)(out + 100) = v1;
> +    *(vbool16_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool32_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
> +    vbool32_t v1 = *(vbool32_t*)in;
> +    vbool64_t v2 = *(vbool64_t*)in;
> +
> +    *(vbool32_t*)(out + 100) = v1;
> +    *(vbool64_t*)(out + 200) = v2;
> +}
> +
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 6 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 13 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-7.c 
> b/gcc/testsuite/gcc.target/riscv/pr108185-7.c
> new file mode 100644
> index 00000000000..8f6f0b11f09
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/pr108185-7.c
> @@ -0,0 +1,68 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
> +
> +#include "riscv_vector.h"
> +
> +void
> +test_vbool64_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
> +    vbool64_t v1 = *(vbool64_t*)in;
> +    vbool1_t v2 = *(vbool1_t*)in;
> +
> +    *(vbool64_t*)(out + 100) = v1;
> +    *(vbool1_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool64_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
> +    vbool64_t v1 = *(vbool64_t*)in;
> +    vbool2_t v2 = *(vbool2_t*)in;
> +
> +    *(vbool64_t*)(out + 100) = v1;
> +    *(vbool2_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool64_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
> +    vbool64_t v1 = *(vbool64_t*)in;
> +    vbool4_t v2 = *(vbool4_t*)in;
> +
> +    *(vbool64_t*)(out + 100) = v1;
> +    *(vbool4_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool64_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
> +    vbool64_t v1 = *(vbool64_t*)in;
> +    vbool8_t v2 = *(vbool8_t*)in;
> +
> +    *(vbool64_t*)(out + 100) = v1;
> +    *(vbool8_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool64_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
> +    vbool64_t v1 = *(vbool64_t*)in;
> +    vbool16_t v2 = *(vbool16_t*)in;
> +
> +    *(vbool64_t*)(out + 100) = v1;
> +    *(vbool16_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool64_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
> +    vbool64_t v1 = *(vbool64_t*)in;
> +    vbool32_t v2 = *(vbool32_t*)in;
> +
> +    *(vbool64_t*)(out + 100) = v1;
> +    *(vbool32_t*)(out + 200) = v2;
> +}
> +
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 6 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-8.c 
> b/gcc/testsuite/gcc.target/riscv/pr108185-8.c
> new file mode 100644
> index 00000000000..d96959dd064
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/pr108185-8.c
> @@ -0,0 +1,77 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
> +
> +#include "riscv_vector.h"
> +
> +void
> +test_vbool1_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
> +    vbool1_t v1 = *(vbool1_t*)in;
> +    vbool1_t v2 = *(vbool1_t*)in;
> +
> +    *(vbool1_t*)(out + 100) = v1;
> +    *(vbool1_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool2_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
> +    vbool2_t v1 = *(vbool2_t*)in;
> +    vbool2_t v2 = *(vbool2_t*)in;
> +
> +    *(vbool2_t*)(out + 100) = v1;
> +    *(vbool2_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool4_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
> +    vbool4_t v1 = *(vbool4_t*)in;
> +    vbool4_t v2 = *(vbool4_t*)in;
> +
> +    *(vbool4_t*)(out + 100) = v1;
> +    *(vbool4_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool8_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
> +    vbool8_t v1 = *(vbool8_t*)in;
> +    vbool8_t v2 = *(vbool8_t*)in;
> +
> +    *(vbool8_t*)(out + 100) = v1;
> +    *(vbool8_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool16_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
> +    vbool16_t v1 = *(vbool16_t*)in;
> +    vbool16_t v2 = *(vbool16_t*)in;
> +
> +    *(vbool16_t*)(out + 100) = v1;
> +    *(vbool16_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool32_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
> +    vbool32_t v1 = *(vbool32_t*)in;
> +    vbool32_t v2 = *(vbool32_t*)in;
> +
> +    *(vbool32_t*)(out + 100) = v1;
> +    *(vbool32_t*)(out + 200) = v2;
> +}
> +
> +void
> +test_vbool64_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
> +    vbool64_t v1 = *(vbool64_t*)in;
> +    vbool64_t v2 = *(vbool64_t*)in;
> +
> +    *(vbool64_t*)(out + 100) = v1;
> +    *(vbool64_t*)(out + 200) = v2;
> +}
> +
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
> +/* { dg-final { scan-assembler-times 
> +{vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 7 } } */
> +/* { dg-final { scan-assembler-times 
> +{vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 14 } } */
  
Richard Sandiford March 2, 2023, 5:54 p.m. UTC | #3
"Li, Pan2" <pan2.li@intel.com> writes:
> Oops, looks I missed that part for assertion. Thank you for coaching.
> Added and tested the below changes at the end of emit_mode_adjustments already but looks we may have other problems about the size, the precision and the C types.
>
> Looks like I need to hold this PATCH for a while until we have a conclusion. Feel free to let me know if there is mistake or misleading.
>
> + 
> +  for_all_modes (c, m)
> +    printf ("  gcc_checking_assert (!mode_size[E_%smode].is_constant()"
> +           " || mode_size[E_%smode].coeffs[0] != -1);\n", m->name, m->name);
> +

Using:

  gcc_assert (maybe_ne (mode_size[E_%smode], -1));

would be simpler.  We might as well make it a full assert (rather than a
checking assert) because this code isn't executed very often.

Thanks,
Richard

>
> Thank you and have a nice day!
>
> Pan
>
>
> -----Original Message-----
> From: Richard Sandiford <richard.sandiford@arm.com> 
> Sent: Thursday, March 2, 2023 5:44 PM
> To: Li, Pan2 <pan2.li@intel.com>
> Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@sifive.com; rguenther@suse.de
> Subject: Re: [PATCH v2] RISC-V: Bugfix for rvv bool mode precision adjustment
>
> pan2.li@intel.com writes:
>> From: Pan Li <pan2.li@intel.com>
>>
>> 	Fix the bug of the rvv bool mode precision with the adjustment.
>> 	The bits size of vbool*_t will be adjusted to
>> 	[1, 2, 4, 8, 16, 32, 64] according to the rvv spec 1.0 isa. The
>> 	adjusted mode precison of vbool*_t will help underlying pass to
>> 	make the right decision for both the correctness and optimization.
>>
>> 	Given below sample code:
>> 	void test_1(int8_t * restrict in, int8_t * restrict out)
>> 	{
>> 	  vbool8_t v2 = *(vbool8_t*)in;
>> 	  vbool16_t v5 = *(vbool16_t*)in;
>> 	  *(vbool16_t*)(out + 200) = v5;
>> 	  *(vbool8_t*)(out + 100) = v2;
>> 	}
>>
>> 	Before the precision adjustment:
>> 	addi    a4,a1,100
>> 	vsetvli a5,zero,e8,m1,ta,ma
>> 	addi    a1,a1,200
>> 	vlm.v   v24,0(a0)
>> 	vsm.v   v24,0(a4)
>> 	// Need one vsetvli and vlm.v for correctness here.
>> 	vsm.v   v24,0(a1)
>>
>> 	After the precision adjustment:
>> 	csrr    t0,vlenb
>> 	slli    t1,t0,1
>> 	csrr    a3,vlenb
>> 	sub     sp,sp,t1
>> 	slli    a4,a3,1
>> 	add     a4,a4,sp
>> 	sub     a3,a4,a3
>> 	vsetvli a5,zero,e8,m1,ta,ma
>> 	addi    a2,a1,200
>> 	vlm.v   v24,0(a0)
>> 	vsm.v   v24,0(a3)
>> 	addi    a1,a1,100
>> 	vsetvli a4,zero,e8,mf2,ta,ma
>> 	csrr    t0,vlenb
>> 	vlm.v   v25,0(a3)
>> 	vsm.v   v25,0(a2)
>> 	slli    t1,t0,1
>> 	vsetvli a5,zero,e8,m1,ta,ma
>> 	vsm.v   v24,0(a1)
>> 	add     sp,sp,t1
>> 	jr      ra
>>
>> 	However, there may be some optimization opportunates after
>> 	the mode precision adjustment. It can be token care of in
>> 	the RISC-V backend in the underlying separted PR(s).
>>
>> 	PR 108185
>> 	PR 108654
>>
>> gcc/ChangeLog:
>>
>> 	* config/riscv/riscv-modes.def (ADJUST_PRECISION):
>> 	* config/riscv/riscv.cc (riscv_v_adjust_precision):
>> 	* config/riscv/riscv.h (riscv_v_adjust_precision):
>> 	* genmodes.cc (ADJUST_PRECISION):
>> 	(emit_mode_adjustments):
>>
>> gcc/testsuite/ChangeLog:
>>
>> 	* gcc.target/riscv/pr108185-1.c: New test.
>> 	* gcc.target/riscv/pr108185-2.c: New test.
>> 	* gcc.target/riscv/pr108185-3.c: New test.
>> 	* gcc.target/riscv/pr108185-4.c: New test.
>> 	* gcc.target/riscv/pr108185-5.c: New test.
>> 	* gcc.target/riscv/pr108185-6.c: New test.
>> 	* gcc.target/riscv/pr108185-7.c: New test.
>> 	* gcc.target/riscv/pr108185-8.c: New test.
>>
>> Signed-off-by: Pan Li <pan2.li@intel.com>
>> ---
>>  gcc/config/riscv/riscv-modes.def            |  8 +++
>>  gcc/config/riscv/riscv.cc                   | 12 ++++
>>  gcc/config/riscv/riscv.h                    |  1 +
>>  gcc/genmodes.cc                             | 20 +++++-
>>  gcc/testsuite/gcc.target/riscv/pr108185-1.c | 68 ++++++++++++++++++  
>> gcc/testsuite/gcc.target/riscv/pr108185-2.c | 68 ++++++++++++++++++  
>> gcc/testsuite/gcc.target/riscv/pr108185-3.c | 68 ++++++++++++++++++  
>> gcc/testsuite/gcc.target/riscv/pr108185-4.c | 68 ++++++++++++++++++  
>> gcc/testsuite/gcc.target/riscv/pr108185-5.c | 68 ++++++++++++++++++  
>> gcc/testsuite/gcc.target/riscv/pr108185-6.c | 68 ++++++++++++++++++  
>> gcc/testsuite/gcc.target/riscv/pr108185-7.c | 68 ++++++++++++++++++  
>> gcc/testsuite/gcc.target/riscv/pr108185-8.c | 77 +++++++++++++++++++++
>>  12 files changed, 592 insertions(+), 2 deletions(-)  create mode 
>> 100644 gcc/testsuite/gcc.target/riscv/pr108185-1.c
>>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-2.c
>>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-3.c
>>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-4.c
>>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-5.c
>>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-6.c
>>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-7.c
>>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-8.c
>>
>> diff --git a/gcc/config/riscv/riscv-modes.def 
>> b/gcc/config/riscv/riscv-modes.def
>> index d5305efa8a6..110bddce851 100644
>> --- a/gcc/config/riscv/riscv-modes.def
>> +++ b/gcc/config/riscv/riscv-modes.def
>> @@ -72,6 +72,14 @@ ADJUST_BYTESIZE (VNx16BI, riscv_vector_chunks * 
>> riscv_bytes_per_vector_chunk);  ADJUST_BYTESIZE (VNx32BI, 
>> riscv_vector_chunks * riscv_bytes_per_vector_chunk);  ADJUST_BYTESIZE 
>> (VNx64BI, riscv_v_adjust_nunits (VNx64BImode, 8));
>>  
>> +ADJUST_PRECISION (VNx1BI, riscv_v_adjust_precision (VNx1BImode, 1)); 
>> +ADJUST_PRECISION (VNx2BI, riscv_v_adjust_precision (VNx2BImode, 2)); 
>> +ADJUST_PRECISION (VNx4BI, riscv_v_adjust_precision (VNx4BImode, 4)); 
>> +ADJUST_PRECISION (VNx8BI, riscv_v_adjust_precision (VNx8BImode, 8)); 
>> +ADJUST_PRECISION (VNx16BI, riscv_v_adjust_precision (VNx16BImode, 
>> +16)); ADJUST_PRECISION (VNx32BI, riscv_v_adjust_precision 
>> +(VNx32BImode, 32)); ADJUST_PRECISION (VNx64BI, 
>> +riscv_v_adjust_precision (VNx64BImode, 64));
>> +
>>  /*
>>     | Mode        | MIN_VLEN=32 | MIN_VLEN=32 | MIN_VLEN=64 | MIN_VLEN=64 |
>>     |             | LMUL        | SEW/LMUL    | LMUL        | SEW/LMUL    |
>> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc 
>> index f11b7949a49..ac5c2527fde 100644
>> --- a/gcc/config/riscv/riscv.cc
>> +++ b/gcc/config/riscv/riscv.cc
>> @@ -1003,6 +1003,18 @@ riscv_v_adjust_nunits (machine_mode mode, int scale)
>>    return scale;
>>  }
>>  
>> +/* Call from ADJUST_PRECISION in riscv-modes.def.  Return the correct
>> +   PRECISION size for corresponding machine_mode.  */
>> +
>> +poly_int64
>> +riscv_v_adjust_precision (machine_mode mode, int scale) {
>> +  if (riscv_v_ext_vector_mode_p (mode))
>> +    return riscv_vector_chunks * scale;
>> +
>> +  return scale;
>> +}
>> +
>>  /* Return true if X is a valid address for machine mode MODE.  If it is,
>>     fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
>>     effect.  */
>> diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h index 
>> 5bc7f2f467d..15b9317a8ce 100644
>> --- a/gcc/config/riscv/riscv.h
>> +++ b/gcc/config/riscv/riscv.h
>> @@ -1025,6 +1025,7 @@ extern unsigned riscv_stack_boundary;  extern 
>> unsigned riscv_bytes_per_vector_chunk;  extern poly_uint16 
>> riscv_vector_chunks;  extern poly_int64 riscv_v_adjust_nunits (enum 
>> machine_mode, int);
>> +extern poly_int64 riscv_v_adjust_precision (enum machine_mode, int);
>>  /* The number of bits and bytes in a RVV vector.  */  #define 
>> BITS_PER_RISCV_VECTOR (poly_uint16 (riscv_vector_chunks * 
>> riscv_bytes_per_vector_chunk * 8))  #define BYTES_PER_RISCV_VECTOR 
>> (poly_uint16 (riscv_vector_chunks * riscv_bytes_per_vector_chunk)) 
>> diff --git a/gcc/genmodes.cc b/gcc/genmodes.cc index 
>> 2d418f09aab..3452d8fb878 100644
>> --- a/gcc/genmodes.cc
>> +++ b/gcc/genmodes.cc
>> @@ -114,6 +114,7 @@ static struct mode_adjust *adj_alignment;  static 
>> struct mode_adjust *adj_format;  static struct mode_adjust *adj_ibit;  
>> static struct mode_adjust *adj_fbit;
>> +static struct mode_adjust *adj_precision;
>>  
>>  /* Mode class operations.  */
>>  static enum mode_class
>> @@ -819,6 +820,7 @@ make_vector_mode (enum mode_class bclass,
>>  #define ADJUST_NUNITS(M, X)    _ADD_ADJUST (nunits, M, X, RANDOM, RANDOM)
>>  #define ADJUST_BYTESIZE(M, X)  _ADD_ADJUST (bytesize, M, X, RANDOM, 
>> RANDOM)  #define ADJUST_ALIGNMENT(M, X) _ADD_ADJUST (alignment, M, X, 
>> RANDOM, RANDOM)
>> +#define ADJUST_PRECISION(M, X) _ADD_ADJUST (precision, M, X, RANDOM, 
>> +RANDOM)
>>  #define ADJUST_FLOAT_FORMAT(M, X)    _ADD_ADJUST (format, M, X, FLOAT, FLOAT)
>>  #define ADJUST_IBIT(M, X)  _ADD_ADJUST (ibit, M, X, ACCUM, UACCUM)  
>> #define ADJUST_FBIT(M, X)  _ADD_ADJUST (fbit, M, X, FRACT, UACCUM) @@ 
>> -1829,8 +1831,9 @@ emit_mode_adjustments (void)
>>  	      " (mode_precision[E_%smode], mode_nunits[E_%smode]);\n",
>>  	      m->name, m->name);
>>        printf ("    mode_precision[E_%smode] = ps * old_factor;\n", m->name);
>> -      printf ("    mode_size[E_%smode] = exact_div (mode_precision[E_%smode],"
>> -	      " BITS_PER_UNIT);\n", m->name, m->name);
>> +      printf ("    if (!multiple_p (mode_precision[E_%smode],"
>> +	      " BITS_PER_UNIT, &mode_size[E_%smode]))\n", m->name, m->name);
>> +      printf ("      mode_size[E_%smode] = -1;\n", m->name);
>
> Following up from what I said yesterday, I think we need to insert code to assert that, once all mode adjustments are complete, no mode_size is still -1.  This would go at the end of emit_mode_adjustments.
> I guess for now it could be restricted to the modes in adj_nunits (if that's simpler).
>
> Thanks,
> Richard
>
>>        printf ("    mode_nunits[E_%smode] = ps;\n", m->name);
>>        printf ("    adjust_mode_mask (E_%smode);\n", m->name);
>>        printf ("  }\n");
>> @@ -1963,6 +1966,19 @@ emit_mode_adjustments (void)
>>      printf ("\n  /* %s:%d */\n  REAL_MODE_FORMAT (E_%smode) = %s;\n",
>>  	    a->file, a->line, a->mode->name, a->adjustment);
>>  
>> +  /* Adjust precision to the actual bits size.  */
>> +  for (a = adj_precision; a; a = a->next)
>> +    switch (a->mode->cl)
>> +      {
>> +	case MODE_VECTOR_BOOL:
>> +	  printf ("\n  /* %s:%d.  */\n  ps = %s;\n", a->file, a->line,
>> +		  a->adjustment);
>> +	  printf ("  mode_precision[E_%smode] = ps;\n", a->mode->name);
>> +	  break;
>> +	default:
>> +	  break;
>> +      }
>> +
>>    puts ("}");
>>  }
>>  
>> diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-1.c 
>> b/gcc/testsuite/gcc.target/riscv/pr108185-1.c
>> new file mode 100644
>> index 00000000000..e70960c5b6d
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/pr108185-1.c
>> @@ -0,0 +1,68 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
>> +
>> +#include "riscv_vector.h"
>> +
>> +void
>> +test_vbool1_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool1_t v1 = *(vbool1_t*)in;
>> +    vbool2_t v2 = *(vbool2_t*)in;
>> +
>> +    *(vbool1_t*)(out + 100) = v1;
>> +    *(vbool2_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool1_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool1_t v1 = *(vbool1_t*)in;
>> +    vbool4_t v2 = *(vbool4_t*)in;
>> +
>> +    *(vbool1_t*)(out + 100) = v1;
>> +    *(vbool4_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool1_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool1_t v1 = *(vbool1_t*)in;
>> +    vbool8_t v2 = *(vbool8_t*)in;
>> +
>> +    *(vbool1_t*)(out + 100) = v1;
>> +    *(vbool8_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool1_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool1_t v1 = *(vbool1_t*)in;
>> +    vbool16_t v2 = *(vbool16_t*)in;
>> +
>> +    *(vbool1_t*)(out + 100) = v1;
>> +    *(vbool16_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool1_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool1_t v1 = *(vbool1_t*)in;
>> +    vbool32_t v2 = *(vbool32_t*)in;
>> +
>> +    *(vbool1_t*)(out + 100) = v1;
>> +    *(vbool32_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool1_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool1_t v1 = *(vbool1_t*)in;
>> +    vbool64_t v2 = *(vbool64_t*)in;
>> +
>> +    *(vbool1_t*)(out + 100) = v1;
>> +    *(vbool64_t*)(out + 200) = v2;
>> +}
>> +
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 6 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 18 } } */
>> diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-2.c 
>> b/gcc/testsuite/gcc.target/riscv/pr108185-2.c
>> new file mode 100644
>> index 00000000000..dcc7a644a88
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/pr108185-2.c
>> @@ -0,0 +1,68 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
>> +
>> +#include "riscv_vector.h"
>> +
>> +void
>> +test_vbool2_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool2_t v1 = *(vbool2_t*)in;
>> +    vbool1_t v2 = *(vbool1_t*)in;
>> +
>> +    *(vbool2_t*)(out + 100) = v1;
>> +    *(vbool1_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool2_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool2_t v1 = *(vbool2_t*)in;
>> +    vbool4_t v2 = *(vbool4_t*)in;
>> +
>> +    *(vbool2_t*)(out + 100) = v1;
>> +    *(vbool4_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool2_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool2_t v1 = *(vbool2_t*)in;
>> +    vbool8_t v2 = *(vbool8_t*)in;
>> +
>> +    *(vbool2_t*)(out + 100) = v1;
>> +    *(vbool8_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool2_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool2_t v1 = *(vbool2_t*)in;
>> +    vbool16_t v2 = *(vbool16_t*)in;
>> +
>> +    *(vbool2_t*)(out + 100) = v1;
>> +    *(vbool16_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool2_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool2_t v1 = *(vbool2_t*)in;
>> +    vbool32_t v2 = *(vbool32_t*)in;
>> +
>> +    *(vbool2_t*)(out + 100) = v1;
>> +    *(vbool32_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool2_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool2_t v1 = *(vbool2_t*)in;
>> +    vbool64_t v2 = *(vbool64_t*)in;
>> +
>> +    *(vbool2_t*)(out + 100) = v1;
>> +    *(vbool64_t*)(out + 200) = v2;
>> +}
>> +
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 6 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 17 } } */
>> diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-3.c 
>> b/gcc/testsuite/gcc.target/riscv/pr108185-3.c
>> new file mode 100644
>> index 00000000000..3af0513e006
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/pr108185-3.c
>> @@ -0,0 +1,68 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
>> +
>> +#include "riscv_vector.h"
>> +
>> +void
>> +test_vbool4_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool4_t v1 = *(vbool4_t*)in;
>> +    vbool1_t v2 = *(vbool1_t*)in;
>> +
>> +    *(vbool4_t*)(out + 100) = v1;
>> +    *(vbool1_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool4_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool4_t v1 = *(vbool4_t*)in;
>> +    vbool2_t v2 = *(vbool2_t*)in;
>> +
>> +    *(vbool4_t*)(out + 100) = v1;
>> +    *(vbool2_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool4_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool4_t v1 = *(vbool4_t*)in;
>> +    vbool8_t v2 = *(vbool8_t*)in;
>> +
>> +    *(vbool4_t*)(out + 100) = v1;
>> +    *(vbool8_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool4_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool4_t v1 = *(vbool4_t*)in;
>> +    vbool16_t v2 = *(vbool16_t*)in;
>> +
>> +    *(vbool4_t*)(out + 100) = v1;
>> +    *(vbool16_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool4_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool4_t v1 = *(vbool4_t*)in;
>> +    vbool32_t v2 = *(vbool32_t*)in;
>> +
>> +    *(vbool4_t*)(out + 100) = v1;
>> +    *(vbool32_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool4_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool4_t v1 = *(vbool4_t*)in;
>> +    vbool64_t v2 = *(vbool64_t*)in;
>> +
>> +    *(vbool4_t*)(out + 100) = v1;
>> +    *(vbool64_t*)(out + 200) = v2;
>> +}
>> +
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 6 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 16 } } */
>> diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-4.c 
>> b/gcc/testsuite/gcc.target/riscv/pr108185-4.c
>> new file mode 100644
>> index 00000000000..ea3c360d756
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/pr108185-4.c
>> @@ -0,0 +1,68 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
>> +
>> +#include "riscv_vector.h"
>> +
>> +void
>> +test_vbool8_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool8_t v1 = *(vbool8_t*)in;
>> +    vbool1_t v2 = *(vbool1_t*)in;
>> +
>> +    *(vbool8_t*)(out + 100) = v1;
>> +    *(vbool1_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool8_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool8_t v1 = *(vbool8_t*)in;
>> +    vbool2_t v2 = *(vbool2_t*)in;
>> +
>> +    *(vbool8_t*)(out + 100) = v1;
>> +    *(vbool2_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool8_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool8_t v1 = *(vbool8_t*)in;
>> +    vbool4_t v2 = *(vbool4_t*)in;
>> +
>> +    *(vbool8_t*)(out + 100) = v1;
>> +    *(vbool4_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool8_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool8_t v1 = *(vbool8_t*)in;
>> +    vbool16_t v2 = *(vbool16_t*)in;
>> +
>> +    *(vbool8_t*)(out + 100) = v1;
>> +    *(vbool16_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool8_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool8_t v1 = *(vbool8_t*)in;
>> +    vbool32_t v2 = *(vbool32_t*)in;
>> +
>> +    *(vbool8_t*)(out + 100) = v1;
>> +    *(vbool32_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool8_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool8_t v1 = *(vbool8_t*)in;
>> +    vbool64_t v2 = *(vbool64_t*)in;
>> +
>> +    *(vbool8_t*)(out + 100) = v1;
>> +    *(vbool64_t*)(out + 200) = v2;
>> +}
>> +
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 6 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 15 } } */
>> diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-5.c 
>> b/gcc/testsuite/gcc.target/riscv/pr108185-5.c
>> new file mode 100644
>> index 00000000000..9fc659d2402
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/pr108185-5.c
>> @@ -0,0 +1,68 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
>> +
>> +#include "riscv_vector.h"
>> +
>> +void
>> +test_vbool16_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool16_t v1 = *(vbool16_t*)in;
>> +    vbool1_t v2 = *(vbool1_t*)in;
>> +
>> +    *(vbool16_t*)(out + 100) = v1;
>> +    *(vbool1_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool16_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool16_t v1 = *(vbool16_t*)in;
>> +    vbool2_t v2 = *(vbool2_t*)in;
>> +
>> +    *(vbool16_t*)(out + 100) = v1;
>> +    *(vbool2_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool16_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool16_t v1 = *(vbool16_t*)in;
>> +    vbool4_t v2 = *(vbool4_t*)in;
>> +
>> +    *(vbool16_t*)(out + 100) = v1;
>> +    *(vbool4_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool16_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool16_t v1 = *(vbool16_t*)in;
>> +    vbool8_t v2 = *(vbool8_t*)in;
>> +
>> +    *(vbool16_t*)(out + 100) = v1;
>> +    *(vbool8_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool16_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool16_t v1 = *(vbool16_t*)in;
>> +    vbool32_t v2 = *(vbool32_t*)in;
>> +
>> +    *(vbool16_t*)(out + 100) = v1;
>> +    *(vbool32_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool16_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool16_t v1 = *(vbool16_t*)in;
>> +    vbool64_t v2 = *(vbool64_t*)in;
>> +
>> +    *(vbool16_t*)(out + 100) = v1;
>> +    *(vbool64_t*)(out + 200) = v2;
>> +}
>> +
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 6 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 14 } } */
>> diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-6.c 
>> b/gcc/testsuite/gcc.target/riscv/pr108185-6.c
>> new file mode 100644
>> index 00000000000..98275e5267d
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/pr108185-6.c
>> @@ -0,0 +1,68 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
>> +
>> +#include "riscv_vector.h"
>> +
>> +void
>> +test_vbool32_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool32_t v1 = *(vbool32_t*)in;
>> +    vbool1_t v2 = *(vbool1_t*)in;
>> +
>> +    *(vbool32_t*)(out + 100) = v1;
>> +    *(vbool1_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool32_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool32_t v1 = *(vbool32_t*)in;
>> +    vbool2_t v2 = *(vbool2_t*)in;
>> +
>> +    *(vbool32_t*)(out + 100) = v1;
>> +    *(vbool2_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool32_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool32_t v1 = *(vbool32_t*)in;
>> +    vbool4_t v2 = *(vbool4_t*)in;
>> +
>> +    *(vbool32_t*)(out + 100) = v1;
>> +    *(vbool4_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool32_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool32_t v1 = *(vbool32_t*)in;
>> +    vbool8_t v2 = *(vbool8_t*)in;
>> +
>> +    *(vbool32_t*)(out + 100) = v1;
>> +    *(vbool8_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool32_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool32_t v1 = *(vbool32_t*)in;
>> +    vbool16_t v2 = *(vbool16_t*)in;
>> +
>> +    *(vbool32_t*)(out + 100) = v1;
>> +    *(vbool16_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool32_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool32_t v1 = *(vbool32_t*)in;
>> +    vbool64_t v2 = *(vbool64_t*)in;
>> +
>> +    *(vbool32_t*)(out + 100) = v1;
>> +    *(vbool64_t*)(out + 200) = v2;
>> +}
>> +
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 6 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 13 } } */
>> diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-7.c 
>> b/gcc/testsuite/gcc.target/riscv/pr108185-7.c
>> new file mode 100644
>> index 00000000000..8f6f0b11f09
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/pr108185-7.c
>> @@ -0,0 +1,68 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
>> +
>> +#include "riscv_vector.h"
>> +
>> +void
>> +test_vbool64_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool64_t v1 = *(vbool64_t*)in;
>> +    vbool1_t v2 = *(vbool1_t*)in;
>> +
>> +    *(vbool64_t*)(out + 100) = v1;
>> +    *(vbool1_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool64_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool64_t v1 = *(vbool64_t*)in;
>> +    vbool2_t v2 = *(vbool2_t*)in;
>> +
>> +    *(vbool64_t*)(out + 100) = v1;
>> +    *(vbool2_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool64_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool64_t v1 = *(vbool64_t*)in;
>> +    vbool4_t v2 = *(vbool4_t*)in;
>> +
>> +    *(vbool64_t*)(out + 100) = v1;
>> +    *(vbool4_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool64_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool64_t v1 = *(vbool64_t*)in;
>> +    vbool8_t v2 = *(vbool8_t*)in;
>> +
>> +    *(vbool64_t*)(out + 100) = v1;
>> +    *(vbool8_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool64_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool64_t v1 = *(vbool64_t*)in;
>> +    vbool16_t v2 = *(vbool16_t*)in;
>> +
>> +    *(vbool64_t*)(out + 100) = v1;
>> +    *(vbool16_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool64_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool64_t v1 = *(vbool64_t*)in;
>> +    vbool32_t v2 = *(vbool32_t*)in;
>> +
>> +    *(vbool64_t*)(out + 100) = v1;
>> +    *(vbool32_t*)(out + 200) = v2;
>> +}
>> +
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 6 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
>> diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-8.c 
>> b/gcc/testsuite/gcc.target/riscv/pr108185-8.c
>> new file mode 100644
>> index 00000000000..d96959dd064
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/pr108185-8.c
>> @@ -0,0 +1,77 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
>> +
>> +#include "riscv_vector.h"
>> +
>> +void
>> +test_vbool1_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool1_t v1 = *(vbool1_t*)in;
>> +    vbool1_t v2 = *(vbool1_t*)in;
>> +
>> +    *(vbool1_t*)(out + 100) = v1;
>> +    *(vbool1_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool2_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool2_t v1 = *(vbool2_t*)in;
>> +    vbool2_t v2 = *(vbool2_t*)in;
>> +
>> +    *(vbool2_t*)(out + 100) = v1;
>> +    *(vbool2_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool4_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool4_t v1 = *(vbool4_t*)in;
>> +    vbool4_t v2 = *(vbool4_t*)in;
>> +
>> +    *(vbool4_t*)(out + 100) = v1;
>> +    *(vbool4_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool8_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool8_t v1 = *(vbool8_t*)in;
>> +    vbool8_t v2 = *(vbool8_t*)in;
>> +
>> +    *(vbool8_t*)(out + 100) = v1;
>> +    *(vbool8_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool16_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool16_t v1 = *(vbool16_t*)in;
>> +    vbool16_t v2 = *(vbool16_t*)in;
>> +
>> +    *(vbool16_t*)(out + 100) = v1;
>> +    *(vbool16_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool32_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool32_t v1 = *(vbool32_t*)in;
>> +    vbool32_t v2 = *(vbool32_t*)in;
>> +
>> +    *(vbool32_t*)(out + 100) = v1;
>> +    *(vbool32_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool64_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool64_t v1 = *(vbool64_t*)in;
>> +    vbool64_t v2 = *(vbool64_t*)in;
>> +
>> +    *(vbool64_t*)(out + 100) = v1;
>> +    *(vbool64_t*)(out + 200) = v2;
>> +}
>> +
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 7 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 14 } } */
  
Li, Pan2 via Gcc-patches March 3, 2023, 2:34 a.m. UTC | #4
Got it. Thank you and very appreciate for your help and patient. Updated the PATCH to below link.

https://gcc.gnu.org/pipermail/gcc-patches/2023-March/613257.html

Pan

-----Original Message-----
From: Richard Sandiford <richard.sandiford@arm.com> 
Sent: Friday, March 3, 2023 1:55 AM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@sifive.com; rguenther@suse.de
Subject: Re: [PATCH v2] RISC-V: Bugfix for rvv bool mode precision adjustment

"Li, Pan2" <pan2.li@intel.com> writes:
> Oops, looks I missed that part for assertion. Thank you for coaching.
> Added and tested the below changes at the end of emit_mode_adjustments already but looks we may have other problems about the size, the precision and the C types.
>
> Looks like I need to hold this PATCH for a while until we have a conclusion. Feel free to let me know if there is mistake or misleading.
>
> + 
> +  for_all_modes (c, m)
> +    printf ("  gcc_checking_assert (!mode_size[E_%smode].is_constant()"
> +           " || mode_size[E_%smode].coeffs[0] != -1);\n", m->name, 
> + m->name);
> +

Using:

  gcc_assert (maybe_ne (mode_size[E_%smode], -1));

would be simpler.  We might as well make it a full assert (rather than a checking assert) because this code isn't executed very often.

Thanks,
Richard

>
> Thank you and have a nice day!
>
> Pan
>
>
> -----Original Message-----
> From: Richard Sandiford <richard.sandiford@arm.com>
> Sent: Thursday, March 2, 2023 5:44 PM
> To: Li, Pan2 <pan2.li@intel.com>
> Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; 
> kito.cheng@sifive.com; rguenther@suse.de
> Subject: Re: [PATCH v2] RISC-V: Bugfix for rvv bool mode precision 
> adjustment
>
> pan2.li@intel.com writes:
>> From: Pan Li <pan2.li@intel.com>
>>
>> 	Fix the bug of the rvv bool mode precision with the adjustment.
>> 	The bits size of vbool*_t will be adjusted to
>> 	[1, 2, 4, 8, 16, 32, 64] according to the rvv spec 1.0 isa. The
>> 	adjusted mode precison of vbool*_t will help underlying pass to
>> 	make the right decision for both the correctness and optimization.
>>
>> 	Given below sample code:
>> 	void test_1(int8_t * restrict in, int8_t * restrict out)
>> 	{
>> 	  vbool8_t v2 = *(vbool8_t*)in;
>> 	  vbool16_t v5 = *(vbool16_t*)in;
>> 	  *(vbool16_t*)(out + 200) = v5;
>> 	  *(vbool8_t*)(out + 100) = v2;
>> 	}
>>
>> 	Before the precision adjustment:
>> 	addi    a4,a1,100
>> 	vsetvli a5,zero,e8,m1,ta,ma
>> 	addi    a1,a1,200
>> 	vlm.v   v24,0(a0)
>> 	vsm.v   v24,0(a4)
>> 	// Need one vsetvli and vlm.v for correctness here.
>> 	vsm.v   v24,0(a1)
>>
>> 	After the precision adjustment:
>> 	csrr    t0,vlenb
>> 	slli    t1,t0,1
>> 	csrr    a3,vlenb
>> 	sub     sp,sp,t1
>> 	slli    a4,a3,1
>> 	add     a4,a4,sp
>> 	sub     a3,a4,a3
>> 	vsetvli a5,zero,e8,m1,ta,ma
>> 	addi    a2,a1,200
>> 	vlm.v   v24,0(a0)
>> 	vsm.v   v24,0(a3)
>> 	addi    a1,a1,100
>> 	vsetvli a4,zero,e8,mf2,ta,ma
>> 	csrr    t0,vlenb
>> 	vlm.v   v25,0(a3)
>> 	vsm.v   v25,0(a2)
>> 	slli    t1,t0,1
>> 	vsetvli a5,zero,e8,m1,ta,ma
>> 	vsm.v   v24,0(a1)
>> 	add     sp,sp,t1
>> 	jr      ra
>>
>> 	However, there may be some optimization opportunates after
>> 	the mode precision adjustment. It can be token care of in
>> 	the RISC-V backend in the underlying separted PR(s).
>>
>> 	PR 108185
>> 	PR 108654
>>
>> gcc/ChangeLog:
>>
>> 	* config/riscv/riscv-modes.def (ADJUST_PRECISION):
>> 	* config/riscv/riscv.cc (riscv_v_adjust_precision):
>> 	* config/riscv/riscv.h (riscv_v_adjust_precision):
>> 	* genmodes.cc (ADJUST_PRECISION):
>> 	(emit_mode_adjustments):
>>
>> gcc/testsuite/ChangeLog:
>>
>> 	* gcc.target/riscv/pr108185-1.c: New test.
>> 	* gcc.target/riscv/pr108185-2.c: New test.
>> 	* gcc.target/riscv/pr108185-3.c: New test.
>> 	* gcc.target/riscv/pr108185-4.c: New test.
>> 	* gcc.target/riscv/pr108185-5.c: New test.
>> 	* gcc.target/riscv/pr108185-6.c: New test.
>> 	* gcc.target/riscv/pr108185-7.c: New test.
>> 	* gcc.target/riscv/pr108185-8.c: New test.
>>
>> Signed-off-by: Pan Li <pan2.li@intel.com>
>> ---
>>  gcc/config/riscv/riscv-modes.def            |  8 +++
>>  gcc/config/riscv/riscv.cc                   | 12 ++++
>>  gcc/config/riscv/riscv.h                    |  1 +
>>  gcc/genmodes.cc                             | 20 +++++-
>>  gcc/testsuite/gcc.target/riscv/pr108185-1.c | 68 ++++++++++++++++++ 
>> gcc/testsuite/gcc.target/riscv/pr108185-2.c | 68 ++++++++++++++++++ 
>> gcc/testsuite/gcc.target/riscv/pr108185-3.c | 68 ++++++++++++++++++ 
>> gcc/testsuite/gcc.target/riscv/pr108185-4.c | 68 ++++++++++++++++++ 
>> gcc/testsuite/gcc.target/riscv/pr108185-5.c | 68 ++++++++++++++++++ 
>> gcc/testsuite/gcc.target/riscv/pr108185-6.c | 68 ++++++++++++++++++ 
>> gcc/testsuite/gcc.target/riscv/pr108185-7.c | 68 ++++++++++++++++++ 
>> gcc/testsuite/gcc.target/riscv/pr108185-8.c | 77 
>> +++++++++++++++++++++
>>  12 files changed, 592 insertions(+), 2 deletions(-)  create mode
>> 100644 gcc/testsuite/gcc.target/riscv/pr108185-1.c
>>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-2.c
>>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-3.c
>>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-4.c
>>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-5.c
>>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-6.c
>>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-7.c
>>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-8.c
>>
>> diff --git a/gcc/config/riscv/riscv-modes.def
>> b/gcc/config/riscv/riscv-modes.def
>> index d5305efa8a6..110bddce851 100644
>> --- a/gcc/config/riscv/riscv-modes.def
>> +++ b/gcc/config/riscv/riscv-modes.def
>> @@ -72,6 +72,14 @@ ADJUST_BYTESIZE (VNx16BI, riscv_vector_chunks * 
>> riscv_bytes_per_vector_chunk);  ADJUST_BYTESIZE (VNx32BI, 
>> riscv_vector_chunks * riscv_bytes_per_vector_chunk);  ADJUST_BYTESIZE 
>> (VNx64BI, riscv_v_adjust_nunits (VNx64BImode, 8));
>>  
>> +ADJUST_PRECISION (VNx1BI, riscv_v_adjust_precision (VNx1BImode, 1)); 
>> +ADJUST_PRECISION (VNx2BI, riscv_v_adjust_precision (VNx2BImode, 2)); 
>> +ADJUST_PRECISION (VNx4BI, riscv_v_adjust_precision (VNx4BImode, 4)); 
>> +ADJUST_PRECISION (VNx8BI, riscv_v_adjust_precision (VNx8BImode, 8)); 
>> +ADJUST_PRECISION (VNx16BI, riscv_v_adjust_precision (VNx16BImode, 
>> +16)); ADJUST_PRECISION (VNx32BI, riscv_v_adjust_precision 
>> +(VNx32BImode, 32)); ADJUST_PRECISION (VNx64BI, 
>> +riscv_v_adjust_precision (VNx64BImode, 64));
>> +
>>  /*
>>     | Mode        | MIN_VLEN=32 | MIN_VLEN=32 | MIN_VLEN=64 | MIN_VLEN=64 |
>>     |             | LMUL        | SEW/LMUL    | LMUL        | SEW/LMUL    |
>> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc 
>> index f11b7949a49..ac5c2527fde 100644
>> --- a/gcc/config/riscv/riscv.cc
>> +++ b/gcc/config/riscv/riscv.cc
>> @@ -1003,6 +1003,18 @@ riscv_v_adjust_nunits (machine_mode mode, int scale)
>>    return scale;
>>  }
>>  
>> +/* Call from ADJUST_PRECISION in riscv-modes.def.  Return the correct
>> +   PRECISION size for corresponding machine_mode.  */
>> +
>> +poly_int64
>> +riscv_v_adjust_precision (machine_mode mode, int scale) {
>> +  if (riscv_v_ext_vector_mode_p (mode))
>> +    return riscv_vector_chunks * scale;
>> +
>> +  return scale;
>> +}
>> +
>>  /* Return true if X is a valid address for machine mode MODE.  If it is,
>>     fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
>>     effect.  */
>> diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h 
>> index 5bc7f2f467d..15b9317a8ce 100644
>> --- a/gcc/config/riscv/riscv.h
>> +++ b/gcc/config/riscv/riscv.h
>> @@ -1025,6 +1025,7 @@ extern unsigned riscv_stack_boundary;  extern 
>> unsigned riscv_bytes_per_vector_chunk;  extern poly_uint16 
>> riscv_vector_chunks;  extern poly_int64 riscv_v_adjust_nunits (enum 
>> machine_mode, int);
>> +extern poly_int64 riscv_v_adjust_precision (enum machine_mode, int);
>>  /* The number of bits and bytes in a RVV vector.  */  #define 
>> BITS_PER_RISCV_VECTOR (poly_uint16 (riscv_vector_chunks * 
>> riscv_bytes_per_vector_chunk * 8))  #define BYTES_PER_RISCV_VECTOR
>> (poly_uint16 (riscv_vector_chunks * riscv_bytes_per_vector_chunk)) 
>> diff --git a/gcc/genmodes.cc b/gcc/genmodes.cc index
>> 2d418f09aab..3452d8fb878 100644
>> --- a/gcc/genmodes.cc
>> +++ b/gcc/genmodes.cc
>> @@ -114,6 +114,7 @@ static struct mode_adjust *adj_alignment;  static 
>> struct mode_adjust *adj_format;  static struct mode_adjust *adj_ibit; 
>> static struct mode_adjust *adj_fbit;
>> +static struct mode_adjust *adj_precision;
>>  
>>  /* Mode class operations.  */
>>  static enum mode_class
>> @@ -819,6 +820,7 @@ make_vector_mode (enum mode_class bclass,
>>  #define ADJUST_NUNITS(M, X)    _ADD_ADJUST (nunits, M, X, RANDOM, RANDOM)
>>  #define ADJUST_BYTESIZE(M, X)  _ADD_ADJUST (bytesize, M, X, RANDOM,
>> RANDOM)  #define ADJUST_ALIGNMENT(M, X) _ADD_ADJUST (alignment, M, X, 
>> RANDOM, RANDOM)
>> +#define ADJUST_PRECISION(M, X) _ADD_ADJUST (precision, M, X, RANDOM,
>> +RANDOM)
>>  #define ADJUST_FLOAT_FORMAT(M, X)    _ADD_ADJUST (format, M, X, FLOAT, FLOAT)
>>  #define ADJUST_IBIT(M, X)  _ADD_ADJUST (ibit, M, X, ACCUM, UACCUM) 
>> #define ADJUST_FBIT(M, X)  _ADD_ADJUST (fbit, M, X, FRACT, UACCUM) @@
>> -1829,8 +1831,9 @@ emit_mode_adjustments (void)
>>  	      " (mode_precision[E_%smode], mode_nunits[E_%smode]);\n",
>>  	      m->name, m->name);
>>        printf ("    mode_precision[E_%smode] = ps * old_factor;\n", m->name);
>> -      printf ("    mode_size[E_%smode] = exact_div (mode_precision[E_%smode],"
>> -	      " BITS_PER_UNIT);\n", m->name, m->name);
>> +      printf ("    if (!multiple_p (mode_precision[E_%smode],"
>> +	      " BITS_PER_UNIT, &mode_size[E_%smode]))\n", m->name, m->name);
>> +      printf ("      mode_size[E_%smode] = -1;\n", m->name);
>
> Following up from what I said yesterday, I think we need to insert code to assert that, once all mode adjustments are complete, no mode_size is still -1.  This would go at the end of emit_mode_adjustments.
> I guess for now it could be restricted to the modes in adj_nunits (if that's simpler).
>
> Thanks,
> Richard
>
>>        printf ("    mode_nunits[E_%smode] = ps;\n", m->name);
>>        printf ("    adjust_mode_mask (E_%smode);\n", m->name);
>>        printf ("  }\n");
>> @@ -1963,6 +1966,19 @@ emit_mode_adjustments (void)
>>      printf ("\n  /* %s:%d */\n  REAL_MODE_FORMAT (E_%smode) = %s;\n",
>>  	    a->file, a->line, a->mode->name, a->adjustment);
>>  
>> +  /* Adjust precision to the actual bits size.  */
>> +  for (a = adj_precision; a; a = a->next)
>> +    switch (a->mode->cl)
>> +      {
>> +	case MODE_VECTOR_BOOL:
>> +	  printf ("\n  /* %s:%d.  */\n  ps = %s;\n", a->file, a->line,
>> +		  a->adjustment);
>> +	  printf ("  mode_precision[E_%smode] = ps;\n", a->mode->name);
>> +	  break;
>> +	default:
>> +	  break;
>> +      }
>> +
>>    puts ("}");
>>  }
>>  
>> diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-1.c
>> b/gcc/testsuite/gcc.target/riscv/pr108185-1.c
>> new file mode 100644
>> index 00000000000..e70960c5b6d
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/pr108185-1.c
>> @@ -0,0 +1,68 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
>> +
>> +#include "riscv_vector.h"
>> +
>> +void
>> +test_vbool1_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool1_t v1 = *(vbool1_t*)in;
>> +    vbool2_t v2 = *(vbool2_t*)in;
>> +
>> +    *(vbool1_t*)(out + 100) = v1;
>> +    *(vbool2_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool1_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool1_t v1 = *(vbool1_t*)in;
>> +    vbool4_t v2 = *(vbool4_t*)in;
>> +
>> +    *(vbool1_t*)(out + 100) = v1;
>> +    *(vbool4_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool1_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool1_t v1 = *(vbool1_t*)in;
>> +    vbool8_t v2 = *(vbool8_t*)in;
>> +
>> +    *(vbool1_t*)(out + 100) = v1;
>> +    *(vbool8_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool1_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool1_t v1 = *(vbool1_t*)in;
>> +    vbool16_t v2 = *(vbool16_t*)in;
>> +
>> +    *(vbool1_t*)(out + 100) = v1;
>> +    *(vbool16_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool1_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool1_t v1 = *(vbool1_t*)in;
>> +    vbool32_t v2 = *(vbool32_t*)in;
>> +
>> +    *(vbool1_t*)(out + 100) = v1;
>> +    *(vbool32_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool1_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool1_t v1 = *(vbool1_t*)in;
>> +    vbool64_t v2 = *(vbool64_t*)in;
>> +
>> +    *(vbool1_t*)(out + 100) = v1;
>> +    *(vbool64_t*)(out + 200) = v2;
>> +}
>> +
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 6 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 18 } } */
>> diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-2.c
>> b/gcc/testsuite/gcc.target/riscv/pr108185-2.c
>> new file mode 100644
>> index 00000000000..dcc7a644a88
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/pr108185-2.c
>> @@ -0,0 +1,68 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
>> +
>> +#include "riscv_vector.h"
>> +
>> +void
>> +test_vbool2_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool2_t v1 = *(vbool2_t*)in;
>> +    vbool1_t v2 = *(vbool1_t*)in;
>> +
>> +    *(vbool2_t*)(out + 100) = v1;
>> +    *(vbool1_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool2_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool2_t v1 = *(vbool2_t*)in;
>> +    vbool4_t v2 = *(vbool4_t*)in;
>> +
>> +    *(vbool2_t*)(out + 100) = v1;
>> +    *(vbool4_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool2_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool2_t v1 = *(vbool2_t*)in;
>> +    vbool8_t v2 = *(vbool8_t*)in;
>> +
>> +    *(vbool2_t*)(out + 100) = v1;
>> +    *(vbool8_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool2_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool2_t v1 = *(vbool2_t*)in;
>> +    vbool16_t v2 = *(vbool16_t*)in;
>> +
>> +    *(vbool2_t*)(out + 100) = v1;
>> +    *(vbool16_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool2_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool2_t v1 = *(vbool2_t*)in;
>> +    vbool32_t v2 = *(vbool32_t*)in;
>> +
>> +    *(vbool2_t*)(out + 100) = v1;
>> +    *(vbool32_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool2_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool2_t v1 = *(vbool2_t*)in;
>> +    vbool64_t v2 = *(vbool64_t*)in;
>> +
>> +    *(vbool2_t*)(out + 100) = v1;
>> +    *(vbool64_t*)(out + 200) = v2;
>> +}
>> +
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 6 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 17 } } */
>> diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-3.c
>> b/gcc/testsuite/gcc.target/riscv/pr108185-3.c
>> new file mode 100644
>> index 00000000000..3af0513e006
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/pr108185-3.c
>> @@ -0,0 +1,68 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
>> +
>> +#include "riscv_vector.h"
>> +
>> +void
>> +test_vbool4_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool4_t v1 = *(vbool4_t*)in;
>> +    vbool1_t v2 = *(vbool1_t*)in;
>> +
>> +    *(vbool4_t*)(out + 100) = v1;
>> +    *(vbool1_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool4_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool4_t v1 = *(vbool4_t*)in;
>> +    vbool2_t v2 = *(vbool2_t*)in;
>> +
>> +    *(vbool4_t*)(out + 100) = v1;
>> +    *(vbool2_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool4_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool4_t v1 = *(vbool4_t*)in;
>> +    vbool8_t v2 = *(vbool8_t*)in;
>> +
>> +    *(vbool4_t*)(out + 100) = v1;
>> +    *(vbool8_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool4_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool4_t v1 = *(vbool4_t*)in;
>> +    vbool16_t v2 = *(vbool16_t*)in;
>> +
>> +    *(vbool4_t*)(out + 100) = v1;
>> +    *(vbool16_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool4_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool4_t v1 = *(vbool4_t*)in;
>> +    vbool32_t v2 = *(vbool32_t*)in;
>> +
>> +    *(vbool4_t*)(out + 100) = v1;
>> +    *(vbool32_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool4_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool4_t v1 = *(vbool4_t*)in;
>> +    vbool64_t v2 = *(vbool64_t*)in;
>> +
>> +    *(vbool4_t*)(out + 100) = v1;
>> +    *(vbool64_t*)(out + 200) = v2;
>> +}
>> +
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 6 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 16 } } */
>> diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-4.c
>> b/gcc/testsuite/gcc.target/riscv/pr108185-4.c
>> new file mode 100644
>> index 00000000000..ea3c360d756
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/pr108185-4.c
>> @@ -0,0 +1,68 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
>> +
>> +#include "riscv_vector.h"
>> +
>> +void
>> +test_vbool8_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool8_t v1 = *(vbool8_t*)in;
>> +    vbool1_t v2 = *(vbool1_t*)in;
>> +
>> +    *(vbool8_t*)(out + 100) = v1;
>> +    *(vbool1_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool8_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool8_t v1 = *(vbool8_t*)in;
>> +    vbool2_t v2 = *(vbool2_t*)in;
>> +
>> +    *(vbool8_t*)(out + 100) = v1;
>> +    *(vbool2_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool8_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool8_t v1 = *(vbool8_t*)in;
>> +    vbool4_t v2 = *(vbool4_t*)in;
>> +
>> +    *(vbool8_t*)(out + 100) = v1;
>> +    *(vbool4_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool8_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool8_t v1 = *(vbool8_t*)in;
>> +    vbool16_t v2 = *(vbool16_t*)in;
>> +
>> +    *(vbool8_t*)(out + 100) = v1;
>> +    *(vbool16_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool8_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool8_t v1 = *(vbool8_t*)in;
>> +    vbool32_t v2 = *(vbool32_t*)in;
>> +
>> +    *(vbool8_t*)(out + 100) = v1;
>> +    *(vbool32_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool8_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool8_t v1 = *(vbool8_t*)in;
>> +    vbool64_t v2 = *(vbool64_t*)in;
>> +
>> +    *(vbool8_t*)(out + 100) = v1;
>> +    *(vbool64_t*)(out + 200) = v2;
>> +}
>> +
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 6 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 15 } } */
>> diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-5.c
>> b/gcc/testsuite/gcc.target/riscv/pr108185-5.c
>> new file mode 100644
>> index 00000000000..9fc659d2402
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/pr108185-5.c
>> @@ -0,0 +1,68 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
>> +
>> +#include "riscv_vector.h"
>> +
>> +void
>> +test_vbool16_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool16_t v1 = *(vbool16_t*)in;
>> +    vbool1_t v2 = *(vbool1_t*)in;
>> +
>> +    *(vbool16_t*)(out + 100) = v1;
>> +    *(vbool1_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool16_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool16_t v1 = *(vbool16_t*)in;
>> +    vbool2_t v2 = *(vbool2_t*)in;
>> +
>> +    *(vbool16_t*)(out + 100) = v1;
>> +    *(vbool2_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool16_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool16_t v1 = *(vbool16_t*)in;
>> +    vbool4_t v2 = *(vbool4_t*)in;
>> +
>> +    *(vbool16_t*)(out + 100) = v1;
>> +    *(vbool4_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool16_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool16_t v1 = *(vbool16_t*)in;
>> +    vbool8_t v2 = *(vbool8_t*)in;
>> +
>> +    *(vbool16_t*)(out + 100) = v1;
>> +    *(vbool8_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool16_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool16_t v1 = *(vbool16_t*)in;
>> +    vbool32_t v2 = *(vbool32_t*)in;
>> +
>> +    *(vbool16_t*)(out + 100) = v1;
>> +    *(vbool32_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool16_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool16_t v1 = *(vbool16_t*)in;
>> +    vbool64_t v2 = *(vbool64_t*)in;
>> +
>> +    *(vbool16_t*)(out + 100) = v1;
>> +    *(vbool64_t*)(out + 200) = v2;
>> +}
>> +
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 6 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 14 } } */
>> diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-6.c
>> b/gcc/testsuite/gcc.target/riscv/pr108185-6.c
>> new file mode 100644
>> index 00000000000..98275e5267d
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/pr108185-6.c
>> @@ -0,0 +1,68 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
>> +
>> +#include "riscv_vector.h"
>> +
>> +void
>> +test_vbool32_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool32_t v1 = *(vbool32_t*)in;
>> +    vbool1_t v2 = *(vbool1_t*)in;
>> +
>> +    *(vbool32_t*)(out + 100) = v1;
>> +    *(vbool1_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool32_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool32_t v1 = *(vbool32_t*)in;
>> +    vbool2_t v2 = *(vbool2_t*)in;
>> +
>> +    *(vbool32_t*)(out + 100) = v1;
>> +    *(vbool2_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool32_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool32_t v1 = *(vbool32_t*)in;
>> +    vbool4_t v2 = *(vbool4_t*)in;
>> +
>> +    *(vbool32_t*)(out + 100) = v1;
>> +    *(vbool4_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool32_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool32_t v1 = *(vbool32_t*)in;
>> +    vbool8_t v2 = *(vbool8_t*)in;
>> +
>> +    *(vbool32_t*)(out + 100) = v1;
>> +    *(vbool8_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool32_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool32_t v1 = *(vbool32_t*)in;
>> +    vbool16_t v2 = *(vbool16_t*)in;
>> +
>> +    *(vbool32_t*)(out + 100) = v1;
>> +    *(vbool16_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool32_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool32_t v1 = *(vbool32_t*)in;
>> +    vbool64_t v2 = *(vbool64_t*)in;
>> +
>> +    *(vbool32_t*)(out + 100) = v1;
>> +    *(vbool64_t*)(out + 200) = v2;
>> +}
>> +
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 6 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 13 } } */
>> diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-7.c
>> b/gcc/testsuite/gcc.target/riscv/pr108185-7.c
>> new file mode 100644
>> index 00000000000..8f6f0b11f09
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/pr108185-7.c
>> @@ -0,0 +1,68 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
>> +
>> +#include "riscv_vector.h"
>> +
>> +void
>> +test_vbool64_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool64_t v1 = *(vbool64_t*)in;
>> +    vbool1_t v2 = *(vbool1_t*)in;
>> +
>> +    *(vbool64_t*)(out + 100) = v1;
>> +    *(vbool1_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool64_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool64_t v1 = *(vbool64_t*)in;
>> +    vbool2_t v2 = *(vbool2_t*)in;
>> +
>> +    *(vbool64_t*)(out + 100) = v1;
>> +    *(vbool2_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool64_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool64_t v1 = *(vbool64_t*)in;
>> +    vbool4_t v2 = *(vbool4_t*)in;
>> +
>> +    *(vbool64_t*)(out + 100) = v1;
>> +    *(vbool4_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool64_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool64_t v1 = *(vbool64_t*)in;
>> +    vbool8_t v2 = *(vbool8_t*)in;
>> +
>> +    *(vbool64_t*)(out + 100) = v1;
>> +    *(vbool8_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool64_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool64_t v1 = *(vbool64_t*)in;
>> +    vbool16_t v2 = *(vbool16_t*)in;
>> +
>> +    *(vbool64_t*)(out + 100) = v1;
>> +    *(vbool16_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool64_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool64_t v1 = *(vbool64_t*)in;
>> +    vbool32_t v2 = *(vbool32_t*)in;
>> +
>> +    *(vbool64_t*)(out + 100) = v1;
>> +    *(vbool32_t*)(out + 200) = v2;
>> +}
>> +
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 6 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
>> diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-8.c
>> b/gcc/testsuite/gcc.target/riscv/pr108185-8.c
>> new file mode 100644
>> index 00000000000..d96959dd064
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/pr108185-8.c
>> @@ -0,0 +1,77 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
>> +
>> +#include "riscv_vector.h"
>> +
>> +void
>> +test_vbool1_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool1_t v1 = *(vbool1_t*)in;
>> +    vbool1_t v2 = *(vbool1_t*)in;
>> +
>> +    *(vbool1_t*)(out + 100) = v1;
>> +    *(vbool1_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool2_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool2_t v1 = *(vbool2_t*)in;
>> +    vbool2_t v2 = *(vbool2_t*)in;
>> +
>> +    *(vbool2_t*)(out + 100) = v1;
>> +    *(vbool2_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool4_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool4_t v1 = *(vbool4_t*)in;
>> +    vbool4_t v2 = *(vbool4_t*)in;
>> +
>> +    *(vbool4_t*)(out + 100) = v1;
>> +    *(vbool4_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool8_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool8_t v1 = *(vbool8_t*)in;
>> +    vbool8_t v2 = *(vbool8_t*)in;
>> +
>> +    *(vbool8_t*)(out + 100) = v1;
>> +    *(vbool8_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool16_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool16_t v1 = *(vbool16_t*)in;
>> +    vbool16_t v2 = *(vbool16_t*)in;
>> +
>> +    *(vbool16_t*)(out + 100) = v1;
>> +    *(vbool16_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool32_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool32_t v1 = *(vbool32_t*)in;
>> +    vbool32_t v2 = *(vbool32_t*)in;
>> +
>> +    *(vbool32_t*)(out + 100) = v1;
>> +    *(vbool32_t*)(out + 200) = v2;
>> +}
>> +
>> +void
>> +test_vbool64_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
>> +    vbool64_t v1 = *(vbool64_t*)in;
>> +    vbool64_t v2 = *(vbool64_t*)in;
>> +
>> +    *(vbool64_t*)(out + 100) = v1;
>> +    *(vbool64_t*)(out + 200) = v2;
>> +}
>> +
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 7 } } */
>> +/* { dg-final { scan-assembler-times 
>> +{vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 14 } } */
  

Patch

diff --git a/gcc/config/riscv/riscv-modes.def b/gcc/config/riscv/riscv-modes.def
index d5305efa8a6..110bddce851 100644
--- a/gcc/config/riscv/riscv-modes.def
+++ b/gcc/config/riscv/riscv-modes.def
@@ -72,6 +72,14 @@  ADJUST_BYTESIZE (VNx16BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk);
 ADJUST_BYTESIZE (VNx32BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk);
 ADJUST_BYTESIZE (VNx64BI, riscv_v_adjust_nunits (VNx64BImode, 8));
 
+ADJUST_PRECISION (VNx1BI, riscv_v_adjust_precision (VNx1BImode, 1));
+ADJUST_PRECISION (VNx2BI, riscv_v_adjust_precision (VNx2BImode, 2));
+ADJUST_PRECISION (VNx4BI, riscv_v_adjust_precision (VNx4BImode, 4));
+ADJUST_PRECISION (VNx8BI, riscv_v_adjust_precision (VNx8BImode, 8));
+ADJUST_PRECISION (VNx16BI, riscv_v_adjust_precision (VNx16BImode, 16));
+ADJUST_PRECISION (VNx32BI, riscv_v_adjust_precision (VNx32BImode, 32));
+ADJUST_PRECISION (VNx64BI, riscv_v_adjust_precision (VNx64BImode, 64));
+
 /*
    | Mode        | MIN_VLEN=32 | MIN_VLEN=32 | MIN_VLEN=64 | MIN_VLEN=64 |
    |             | LMUL        | SEW/LMUL    | LMUL        | SEW/LMUL    |
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index f11b7949a49..ac5c2527fde 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -1003,6 +1003,18 @@  riscv_v_adjust_nunits (machine_mode mode, int scale)
   return scale;
 }
 
+/* Call from ADJUST_PRECISION in riscv-modes.def.  Return the correct
+   PRECISION size for corresponding machine_mode.  */
+
+poly_int64
+riscv_v_adjust_precision (machine_mode mode, int scale)
+{
+  if (riscv_v_ext_vector_mode_p (mode))
+    return riscv_vector_chunks * scale;
+
+  return scale;
+}
+
 /* Return true if X is a valid address for machine mode MODE.  If it is,
    fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
    effect.  */
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 5bc7f2f467d..15b9317a8ce 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -1025,6 +1025,7 @@  extern unsigned riscv_stack_boundary;
 extern unsigned riscv_bytes_per_vector_chunk;
 extern poly_uint16 riscv_vector_chunks;
 extern poly_int64 riscv_v_adjust_nunits (enum machine_mode, int);
+extern poly_int64 riscv_v_adjust_precision (enum machine_mode, int);
 /* The number of bits and bytes in a RVV vector.  */
 #define BITS_PER_RISCV_VECTOR (poly_uint16 (riscv_vector_chunks * riscv_bytes_per_vector_chunk * 8))
 #define BYTES_PER_RISCV_VECTOR (poly_uint16 (riscv_vector_chunks * riscv_bytes_per_vector_chunk))
diff --git a/gcc/genmodes.cc b/gcc/genmodes.cc
index 2d418f09aab..3452d8fb878 100644
--- a/gcc/genmodes.cc
+++ b/gcc/genmodes.cc
@@ -114,6 +114,7 @@  static struct mode_adjust *adj_alignment;
 static struct mode_adjust *adj_format;
 static struct mode_adjust *adj_ibit;
 static struct mode_adjust *adj_fbit;
+static struct mode_adjust *adj_precision;
 
 /* Mode class operations.  */
 static enum mode_class
@@ -819,6 +820,7 @@  make_vector_mode (enum mode_class bclass,
 #define ADJUST_NUNITS(M, X)    _ADD_ADJUST (nunits, M, X, RANDOM, RANDOM)
 #define ADJUST_BYTESIZE(M, X)  _ADD_ADJUST (bytesize, M, X, RANDOM, RANDOM)
 #define ADJUST_ALIGNMENT(M, X) _ADD_ADJUST (alignment, M, X, RANDOM, RANDOM)
+#define ADJUST_PRECISION(M, X) _ADD_ADJUST (precision, M, X, RANDOM, RANDOM)
 #define ADJUST_FLOAT_FORMAT(M, X)    _ADD_ADJUST (format, M, X, FLOAT, FLOAT)
 #define ADJUST_IBIT(M, X)  _ADD_ADJUST (ibit, M, X, ACCUM, UACCUM)
 #define ADJUST_FBIT(M, X)  _ADD_ADJUST (fbit, M, X, FRACT, UACCUM)
@@ -1829,8 +1831,9 @@  emit_mode_adjustments (void)
 	      " (mode_precision[E_%smode], mode_nunits[E_%smode]);\n",
 	      m->name, m->name);
       printf ("    mode_precision[E_%smode] = ps * old_factor;\n", m->name);
-      printf ("    mode_size[E_%smode] = exact_div (mode_precision[E_%smode],"
-	      " BITS_PER_UNIT);\n", m->name, m->name);
+      printf ("    if (!multiple_p (mode_precision[E_%smode],"
+	      " BITS_PER_UNIT, &mode_size[E_%smode]))\n", m->name, m->name);
+      printf ("      mode_size[E_%smode] = -1;\n", m->name);
       printf ("    mode_nunits[E_%smode] = ps;\n", m->name);
       printf ("    adjust_mode_mask (E_%smode);\n", m->name);
       printf ("  }\n");
@@ -1963,6 +1966,19 @@  emit_mode_adjustments (void)
     printf ("\n  /* %s:%d */\n  REAL_MODE_FORMAT (E_%smode) = %s;\n",
 	    a->file, a->line, a->mode->name, a->adjustment);
 
+  /* Adjust precision to the actual bits size.  */
+  for (a = adj_precision; a; a = a->next)
+    switch (a->mode->cl)
+      {
+	case MODE_VECTOR_BOOL:
+	  printf ("\n  /* %s:%d.  */\n  ps = %s;\n", a->file, a->line,
+		  a->adjustment);
+	  printf ("  mode_precision[E_%smode] = ps;\n", a->mode->name);
+	  break;
+	default:
+	  break;
+      }
+
   puts ("}");
 }
 
diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-1.c b/gcc/testsuite/gcc.target/riscv/pr108185-1.c
new file mode 100644
index 00000000000..e70960c5b6d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr108185-1.c
@@ -0,0 +1,68 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
+
+#include "riscv_vector.h"
+
+void
+test_vbool1_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
+    vbool1_t v1 = *(vbool1_t*)in;
+    vbool2_t v2 = *(vbool2_t*)in;
+
+    *(vbool1_t*)(out + 100) = v1;
+    *(vbool2_t*)(out + 200) = v2;
+}
+
+void
+test_vbool1_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
+    vbool1_t v1 = *(vbool1_t*)in;
+    vbool4_t v2 = *(vbool4_t*)in;
+
+    *(vbool1_t*)(out + 100) = v1;
+    *(vbool4_t*)(out + 200) = v2;
+}
+
+void
+test_vbool1_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
+    vbool1_t v1 = *(vbool1_t*)in;
+    vbool8_t v2 = *(vbool8_t*)in;
+
+    *(vbool1_t*)(out + 100) = v1;
+    *(vbool8_t*)(out + 200) = v2;
+}
+
+void
+test_vbool1_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
+    vbool1_t v1 = *(vbool1_t*)in;
+    vbool16_t v2 = *(vbool16_t*)in;
+
+    *(vbool1_t*)(out + 100) = v1;
+    *(vbool16_t*)(out + 200) = v2;
+}
+
+void
+test_vbool1_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
+    vbool1_t v1 = *(vbool1_t*)in;
+    vbool32_t v2 = *(vbool32_t*)in;
+
+    *(vbool1_t*)(out + 100) = v1;
+    *(vbool32_t*)(out + 200) = v2;
+}
+
+void
+test_vbool1_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
+    vbool1_t v1 = *(vbool1_t*)in;
+    vbool64_t v2 = *(vbool64_t*)in;
+
+    *(vbool1_t*)(out + 100) = v1;
+    *(vbool64_t*)(out + 200) = v2;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 6 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
+/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 18 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-2.c b/gcc/testsuite/gcc.target/riscv/pr108185-2.c
new file mode 100644
index 00000000000..dcc7a644a88
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr108185-2.c
@@ -0,0 +1,68 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
+
+#include "riscv_vector.h"
+
+void
+test_vbool2_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
+    vbool2_t v1 = *(vbool2_t*)in;
+    vbool1_t v2 = *(vbool1_t*)in;
+
+    *(vbool2_t*)(out + 100) = v1;
+    *(vbool1_t*)(out + 200) = v2;
+}
+
+void
+test_vbool2_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
+    vbool2_t v1 = *(vbool2_t*)in;
+    vbool4_t v2 = *(vbool4_t*)in;
+
+    *(vbool2_t*)(out + 100) = v1;
+    *(vbool4_t*)(out + 200) = v2;
+}
+
+void
+test_vbool2_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
+    vbool2_t v1 = *(vbool2_t*)in;
+    vbool8_t v2 = *(vbool8_t*)in;
+
+    *(vbool2_t*)(out + 100) = v1;
+    *(vbool8_t*)(out + 200) = v2;
+}
+
+void
+test_vbool2_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
+    vbool2_t v1 = *(vbool2_t*)in;
+    vbool16_t v2 = *(vbool16_t*)in;
+
+    *(vbool2_t*)(out + 100) = v1;
+    *(vbool16_t*)(out + 200) = v2;
+}
+
+void
+test_vbool2_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
+    vbool2_t v1 = *(vbool2_t*)in;
+    vbool32_t v2 = *(vbool32_t*)in;
+
+    *(vbool2_t*)(out + 100) = v1;
+    *(vbool32_t*)(out + 200) = v2;
+}
+
+void
+test_vbool2_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
+    vbool2_t v1 = *(vbool2_t*)in;
+    vbool64_t v2 = *(vbool64_t*)in;
+
+    *(vbool2_t*)(out + 100) = v1;
+    *(vbool64_t*)(out + 200) = v2;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 6 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
+/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 17 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-3.c b/gcc/testsuite/gcc.target/riscv/pr108185-3.c
new file mode 100644
index 00000000000..3af0513e006
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr108185-3.c
@@ -0,0 +1,68 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
+
+#include "riscv_vector.h"
+
+void
+test_vbool4_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
+    vbool4_t v1 = *(vbool4_t*)in;
+    vbool1_t v2 = *(vbool1_t*)in;
+
+    *(vbool4_t*)(out + 100) = v1;
+    *(vbool1_t*)(out + 200) = v2;
+}
+
+void
+test_vbool4_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
+    vbool4_t v1 = *(vbool4_t*)in;
+    vbool2_t v2 = *(vbool2_t*)in;
+
+    *(vbool4_t*)(out + 100) = v1;
+    *(vbool2_t*)(out + 200) = v2;
+}
+
+void
+test_vbool4_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
+    vbool4_t v1 = *(vbool4_t*)in;
+    vbool8_t v2 = *(vbool8_t*)in;
+
+    *(vbool4_t*)(out + 100) = v1;
+    *(vbool8_t*)(out + 200) = v2;
+}
+
+void
+test_vbool4_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
+    vbool4_t v1 = *(vbool4_t*)in;
+    vbool16_t v2 = *(vbool16_t*)in;
+
+    *(vbool4_t*)(out + 100) = v1;
+    *(vbool16_t*)(out + 200) = v2;
+}
+
+void
+test_vbool4_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
+    vbool4_t v1 = *(vbool4_t*)in;
+    vbool32_t v2 = *(vbool32_t*)in;
+
+    *(vbool4_t*)(out + 100) = v1;
+    *(vbool32_t*)(out + 200) = v2;
+}
+
+void
+test_vbool4_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
+    vbool4_t v1 = *(vbool4_t*)in;
+    vbool64_t v2 = *(vbool64_t*)in;
+
+    *(vbool4_t*)(out + 100) = v1;
+    *(vbool64_t*)(out + 200) = v2;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 6 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
+/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 16 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-4.c b/gcc/testsuite/gcc.target/riscv/pr108185-4.c
new file mode 100644
index 00000000000..ea3c360d756
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr108185-4.c
@@ -0,0 +1,68 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
+
+#include "riscv_vector.h"
+
+void
+test_vbool8_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
+    vbool8_t v1 = *(vbool8_t*)in;
+    vbool1_t v2 = *(vbool1_t*)in;
+
+    *(vbool8_t*)(out + 100) = v1;
+    *(vbool1_t*)(out + 200) = v2;
+}
+
+void
+test_vbool8_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
+    vbool8_t v1 = *(vbool8_t*)in;
+    vbool2_t v2 = *(vbool2_t*)in;
+
+    *(vbool8_t*)(out + 100) = v1;
+    *(vbool2_t*)(out + 200) = v2;
+}
+
+void
+test_vbool8_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
+    vbool8_t v1 = *(vbool8_t*)in;
+    vbool4_t v2 = *(vbool4_t*)in;
+
+    *(vbool8_t*)(out + 100) = v1;
+    *(vbool4_t*)(out + 200) = v2;
+}
+
+void
+test_vbool8_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
+    vbool8_t v1 = *(vbool8_t*)in;
+    vbool16_t v2 = *(vbool16_t*)in;
+
+    *(vbool8_t*)(out + 100) = v1;
+    *(vbool16_t*)(out + 200) = v2;
+}
+
+void
+test_vbool8_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
+    vbool8_t v1 = *(vbool8_t*)in;
+    vbool32_t v2 = *(vbool32_t*)in;
+
+    *(vbool8_t*)(out + 100) = v1;
+    *(vbool32_t*)(out + 200) = v2;
+}
+
+void
+test_vbool8_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
+    vbool8_t v1 = *(vbool8_t*)in;
+    vbool64_t v2 = *(vbool64_t*)in;
+
+    *(vbool8_t*)(out + 100) = v1;
+    *(vbool64_t*)(out + 200) = v2;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 6 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
+/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 15 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-5.c b/gcc/testsuite/gcc.target/riscv/pr108185-5.c
new file mode 100644
index 00000000000..9fc659d2402
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr108185-5.c
@@ -0,0 +1,68 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
+
+#include "riscv_vector.h"
+
+void
+test_vbool16_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
+    vbool16_t v1 = *(vbool16_t*)in;
+    vbool1_t v2 = *(vbool1_t*)in;
+
+    *(vbool16_t*)(out + 100) = v1;
+    *(vbool1_t*)(out + 200) = v2;
+}
+
+void
+test_vbool16_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
+    vbool16_t v1 = *(vbool16_t*)in;
+    vbool2_t v2 = *(vbool2_t*)in;
+
+    *(vbool16_t*)(out + 100) = v1;
+    *(vbool2_t*)(out + 200) = v2;
+}
+
+void
+test_vbool16_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
+    vbool16_t v1 = *(vbool16_t*)in;
+    vbool4_t v2 = *(vbool4_t*)in;
+
+    *(vbool16_t*)(out + 100) = v1;
+    *(vbool4_t*)(out + 200) = v2;
+}
+
+void
+test_vbool16_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
+    vbool16_t v1 = *(vbool16_t*)in;
+    vbool8_t v2 = *(vbool8_t*)in;
+
+    *(vbool16_t*)(out + 100) = v1;
+    *(vbool8_t*)(out + 200) = v2;
+}
+
+void
+test_vbool16_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
+    vbool16_t v1 = *(vbool16_t*)in;
+    vbool32_t v2 = *(vbool32_t*)in;
+
+    *(vbool16_t*)(out + 100) = v1;
+    *(vbool32_t*)(out + 200) = v2;
+}
+
+void
+test_vbool16_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
+    vbool16_t v1 = *(vbool16_t*)in;
+    vbool64_t v2 = *(vbool64_t*)in;
+
+    *(vbool16_t*)(out + 100) = v1;
+    *(vbool64_t*)(out + 200) = v2;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 6 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
+/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 14 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-6.c b/gcc/testsuite/gcc.target/riscv/pr108185-6.c
new file mode 100644
index 00000000000..98275e5267d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr108185-6.c
@@ -0,0 +1,68 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
+
+#include "riscv_vector.h"
+
+void
+test_vbool32_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
+    vbool32_t v1 = *(vbool32_t*)in;
+    vbool1_t v2 = *(vbool1_t*)in;
+
+    *(vbool32_t*)(out + 100) = v1;
+    *(vbool1_t*)(out + 200) = v2;
+}
+
+void
+test_vbool32_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
+    vbool32_t v1 = *(vbool32_t*)in;
+    vbool2_t v2 = *(vbool2_t*)in;
+
+    *(vbool32_t*)(out + 100) = v1;
+    *(vbool2_t*)(out + 200) = v2;
+}
+
+void
+test_vbool32_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
+    vbool32_t v1 = *(vbool32_t*)in;
+    vbool4_t v2 = *(vbool4_t*)in;
+
+    *(vbool32_t*)(out + 100) = v1;
+    *(vbool4_t*)(out + 200) = v2;
+}
+
+void
+test_vbool32_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
+    vbool32_t v1 = *(vbool32_t*)in;
+    vbool8_t v2 = *(vbool8_t*)in;
+
+    *(vbool32_t*)(out + 100) = v1;
+    *(vbool8_t*)(out + 200) = v2;
+}
+
+void
+test_vbool32_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
+    vbool32_t v1 = *(vbool32_t*)in;
+    vbool16_t v2 = *(vbool16_t*)in;
+
+    *(vbool32_t*)(out + 100) = v1;
+    *(vbool16_t*)(out + 200) = v2;
+}
+
+void
+test_vbool32_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
+    vbool32_t v1 = *(vbool32_t*)in;
+    vbool64_t v2 = *(vbool64_t*)in;
+
+    *(vbool32_t*)(out + 100) = v1;
+    *(vbool64_t*)(out + 200) = v2;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 6 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
+/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 13 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-7.c b/gcc/testsuite/gcc.target/riscv/pr108185-7.c
new file mode 100644
index 00000000000..8f6f0b11f09
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr108185-7.c
@@ -0,0 +1,68 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
+
+#include "riscv_vector.h"
+
+void
+test_vbool64_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
+    vbool64_t v1 = *(vbool64_t*)in;
+    vbool1_t v2 = *(vbool1_t*)in;
+
+    *(vbool64_t*)(out + 100) = v1;
+    *(vbool1_t*)(out + 200) = v2;
+}
+
+void
+test_vbool64_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
+    vbool64_t v1 = *(vbool64_t*)in;
+    vbool2_t v2 = *(vbool2_t*)in;
+
+    *(vbool64_t*)(out + 100) = v1;
+    *(vbool2_t*)(out + 200) = v2;
+}
+
+void
+test_vbool64_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
+    vbool64_t v1 = *(vbool64_t*)in;
+    vbool4_t v2 = *(vbool4_t*)in;
+
+    *(vbool64_t*)(out + 100) = v1;
+    *(vbool4_t*)(out + 200) = v2;
+}
+
+void
+test_vbool64_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
+    vbool64_t v1 = *(vbool64_t*)in;
+    vbool8_t v2 = *(vbool8_t*)in;
+
+    *(vbool64_t*)(out + 100) = v1;
+    *(vbool8_t*)(out + 200) = v2;
+}
+
+void
+test_vbool64_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
+    vbool64_t v1 = *(vbool64_t*)in;
+    vbool16_t v2 = *(vbool16_t*)in;
+
+    *(vbool64_t*)(out + 100) = v1;
+    *(vbool16_t*)(out + 200) = v2;
+}
+
+void
+test_vbool64_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
+    vbool64_t v1 = *(vbool64_t*)in;
+    vbool32_t v2 = *(vbool32_t*)in;
+
+    *(vbool64_t*)(out + 100) = v1;
+    *(vbool32_t*)(out + 200) = v2;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 6 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
+/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/pr108185-8.c b/gcc/testsuite/gcc.target/riscv/pr108185-8.c
new file mode 100644
index 00000000000..d96959dd064
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr108185-8.c
@@ -0,0 +1,77 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
+
+#include "riscv_vector.h"
+
+void
+test_vbool1_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
+    vbool1_t v1 = *(vbool1_t*)in;
+    vbool1_t v2 = *(vbool1_t*)in;
+
+    *(vbool1_t*)(out + 100) = v1;
+    *(vbool1_t*)(out + 200) = v2;
+}
+
+void
+test_vbool2_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
+    vbool2_t v1 = *(vbool2_t*)in;
+    vbool2_t v2 = *(vbool2_t*)in;
+
+    *(vbool2_t*)(out + 100) = v1;
+    *(vbool2_t*)(out + 200) = v2;
+}
+
+void
+test_vbool4_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
+    vbool4_t v1 = *(vbool4_t*)in;
+    vbool4_t v2 = *(vbool4_t*)in;
+
+    *(vbool4_t*)(out + 100) = v1;
+    *(vbool4_t*)(out + 200) = v2;
+}
+
+void
+test_vbool8_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
+    vbool8_t v1 = *(vbool8_t*)in;
+    vbool8_t v2 = *(vbool8_t*)in;
+
+    *(vbool8_t*)(out + 100) = v1;
+    *(vbool8_t*)(out + 200) = v2;
+}
+
+void
+test_vbool16_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
+    vbool16_t v1 = *(vbool16_t*)in;
+    vbool16_t v2 = *(vbool16_t*)in;
+
+    *(vbool16_t*)(out + 100) = v1;
+    *(vbool16_t*)(out + 200) = v2;
+}
+
+void
+test_vbool32_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
+    vbool32_t v1 = *(vbool32_t*)in;
+    vbool32_t v2 = *(vbool32_t*)in;
+
+    *(vbool32_t*)(out + 100) = v1;
+    *(vbool32_t*)(out + 200) = v2;
+}
+
+void
+test_vbool64_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
+    vbool64_t v1 = *(vbool64_t*)in;
+    vbool64_t v2 = *(vbool64_t*)in;
+
+    *(vbool64_t*)(out + 100) = v1;
+    *(vbool64_t*)(out + 200) = v2;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 7 } } */
+/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 14 } } */