[2/6] aarch64: Allow moves after tied-register intrinsics

Message ID 20230509064831.1651327-3-richard.sandiford@arm.com
State Repeat Merge
Headers
Series aarch64: Avoid hard-coding specific register allocations |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

Richard Sandiford May 9, 2023, 6:48 a.m. UTC
  Some ACLE intrinsics map to instructions that tie the output
operand to an input operand.  If all the operands are allocated
to different registers, and if MOVPRFX can't be used, we will need
a move either before the instruction or after it.  Many tests only
matched the "before" case; this patch makes them accept the "after"
case too.

gcc/testsuite/
	* gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c: Allow
	moves to occur after the intrinsic instruction, rather than requiring
	them to happen before.
	* gcc.target/aarch64/advsimd-intrinsics/bfdot-1.c: Likewise.
	* gcc.target/aarch64/advsimd-intrinsics/vdot-3-1.c: Likewise.
	* gcc.target/aarch64/sve/acle/asm/adda_f16.c: Likewise.
	* gcc.target/aarch64/sve/acle/asm/adda_f32.c: Likewise.
	* gcc.target/aarch64/sve/acle/asm/adda_f64.c: Likewise.
	* gcc.target/aarch64/sve/acle/asm/brka_b.c: Likewise.
	* gcc.target/aarch64/sve/acle/asm/brkb_b.c: Likewise.
	* gcc.target/aarch64/sve/acle/asm/brkn_b.c: Likewise.
	* gcc.target/aarch64/sve/acle/asm/clasta_bf16.c: Likewise.
	* gcc.target/aarch64/sve/acle/asm/clasta_f16.c: Likewise.
	* gcc.target/aarch64/sve/acle/asm/clasta_f32.c: Likewise.
	* gcc.target/aarch64/sve/acle/asm/clasta_f64.c: Likewise.
	* gcc.target/aarch64/sve/acle/asm/clastb_bf16.c: Likewise.
	* gcc.target/aarch64/sve/acle/asm/clastb_f16.c: Likewise.
	* gcc.target/aarch64/sve/acle/asm/clastb_f32.c: Likewise.
	* gcc.target/aarch64/sve/acle/asm/clastb_f64.c: Likewise.
	* gcc.target/aarch64/sve/acle/asm/pfirst_b.c: Likewise.
	* gcc.target/aarch64/sve/acle/asm/pnext_b16.c: Likewise.
	* gcc.target/aarch64/sve/acle/asm/pnext_b32.c: Likewise.
	* gcc.target/aarch64/sve/acle/asm/pnext_b64.c: Likewise.
	* gcc.target/aarch64/sve/acle/asm/pnext_b8.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/sli_s16.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/sli_s32.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/sli_s64.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/sli_s8.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/sli_u16.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/sli_u32.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/sli_u64.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/sli_u8.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/sri_s16.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/sri_s32.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/sri_s64.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/sri_s8.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/sri_u16.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/sri_u32.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/sri_u64.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/sri_u8.c: Likewise.
---
 .../aarch64/advsimd-intrinsics/bfcvtnq2-untied.c  |  5 +++++
 .../aarch64/advsimd-intrinsics/bfdot-1.c          | 10 ++++++++++
 .../aarch64/advsimd-intrinsics/vdot-3-1.c         | 10 ++++++++++
 .../gcc.target/aarch64/sve/acle/asm/adda_f16.c    |  5 +++++
 .../gcc.target/aarch64/sve/acle/asm/adda_f32.c    |  5 +++++
 .../gcc.target/aarch64/sve/acle/asm/adda_f64.c    |  5 +++++
 .../gcc.target/aarch64/sve/acle/asm/brka_b.c      |  5 +++++
 .../gcc.target/aarch64/sve/acle/asm/brkb_b.c      |  5 +++++
 .../gcc.target/aarch64/sve/acle/asm/brkn_b.c      |  5 +++++
 .../gcc.target/aarch64/sve/acle/asm/clasta_bf16.c |  5 +++++
 .../gcc.target/aarch64/sve/acle/asm/clasta_f16.c  |  5 +++++
 .../gcc.target/aarch64/sve/acle/asm/clasta_f32.c  |  5 +++++
 .../gcc.target/aarch64/sve/acle/asm/clasta_f64.c  |  5 +++++
 .../gcc.target/aarch64/sve/acle/asm/clastb_bf16.c |  5 +++++
 .../gcc.target/aarch64/sve/acle/asm/clastb_f16.c  |  5 +++++
 .../gcc.target/aarch64/sve/acle/asm/clastb_f32.c  |  5 +++++
 .../gcc.target/aarch64/sve/acle/asm/clastb_f64.c  |  5 +++++
 .../gcc.target/aarch64/sve/acle/asm/pfirst_b.c    |  5 +++++
 .../gcc.target/aarch64/sve/acle/asm/pnext_b16.c   |  5 +++++
 .../gcc.target/aarch64/sve/acle/asm/pnext_b32.c   |  5 +++++
 .../gcc.target/aarch64/sve/acle/asm/pnext_b64.c   |  5 +++++
 .../gcc.target/aarch64/sve/acle/asm/pnext_b8.c    |  5 +++++
 .../gcc.target/aarch64/sve2/acle/asm/sli_s16.c    | 15 +++++++++++++++
 .../gcc.target/aarch64/sve2/acle/asm/sli_s32.c    | 15 +++++++++++++++
 .../gcc.target/aarch64/sve2/acle/asm/sli_s64.c    | 15 +++++++++++++++
 .../gcc.target/aarch64/sve2/acle/asm/sli_s8.c     | 15 +++++++++++++++
 .../gcc.target/aarch64/sve2/acle/asm/sli_u16.c    | 15 +++++++++++++++
 .../gcc.target/aarch64/sve2/acle/asm/sli_u32.c    | 15 +++++++++++++++
 .../gcc.target/aarch64/sve2/acle/asm/sli_u64.c    | 15 +++++++++++++++
 .../gcc.target/aarch64/sve2/acle/asm/sli_u8.c     | 15 +++++++++++++++
 .../gcc.target/aarch64/sve2/acle/asm/sri_s16.c    | 15 +++++++++++++++
 .../gcc.target/aarch64/sve2/acle/asm/sri_s32.c    | 15 +++++++++++++++
 .../gcc.target/aarch64/sve2/acle/asm/sri_s64.c    | 15 +++++++++++++++
 .../gcc.target/aarch64/sve2/acle/asm/sri_s8.c     | 15 +++++++++++++++
 .../gcc.target/aarch64/sve2/acle/asm/sri_u16.c    | 15 +++++++++++++++
 .../gcc.target/aarch64/sve2/acle/asm/sri_u32.c    | 15 +++++++++++++++
 .../gcc.target/aarch64/sve2/acle/asm/sri_u64.c    | 15 +++++++++++++++
 .../gcc.target/aarch64/sve2/acle/asm/sri_u8.c     | 15 +++++++++++++++
 38 files changed, 360 insertions(+)
  

Comments

Kyrylo Tkachov May 15, 2023, 2:04 p.m. UTC | #1
Hi Richard,

> -----Original Message-----
> From: Gcc-patches <gcc-patches-
> bounces+kyrylo.tkachov=arm.com@gcc.gnu.org> On Behalf Of Richard
> Sandiford via Gcc-patches
> Sent: Tuesday, May 9, 2023 7:48 AM
> To: gcc-patches@gcc.gnu.org
> Cc: Richard Sandiford <Richard.Sandiford@arm.com>
> Subject: [PATCH 2/6] aarch64: Allow moves after tied-register intrinsics
> 
> Some ACLE intrinsics map to instructions that tie the output
> operand to an input operand.  If all the operands are allocated
> to different registers, and if MOVPRFX can't be used, we will need
> a move either before the instruction or after it.  Many tests only
> matched the "before" case; this patch makes them accept the "after"
> case too.
> 
> gcc/testsuite/
> 	* gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c: Allow
> 	moves to occur after the intrinsic instruction, rather than requiring
> 	them to happen before.
> 	* gcc.target/aarch64/advsimd-intrinsics/bfdot-1.c: Likewise.
> 	* gcc.target/aarch64/advsimd-intrinsics/vdot-3-1.c: Likewise.

I'm seeing some dot-product intrinsics failures:
FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -O1   check-function-bodies ufoo_untied
FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -O1   check-function-bodies ufooq_lane_untied
FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -O2   check-function-bodies ufoo_untied
FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -O2   check-function-bodies ufooq_lane_untied
FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -O2 -flto -fno-use-linker-plugin -flto-partition=none   check-function-bodies ufoo_untied
FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -O2 -flto -fno-use-linker-plugin -flto-partition=none   check-function-bodies ufooq_lane_untied
FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -O3 -g   check-function-bodies ufoo_untied
FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -O3 -g   check-function-bodies ufooq_lane_untied
FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -Og -g   check-function-bodies ufoo_untied
FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -Og -g   check-function-bodies ufooq_lane_untied
FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -Os   check-function-bodies ufoo_untied
FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -Os   check-function-bodies ufooq_lane_untied
FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -O1   check-function-bodies ufoo_untied
FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -O1   check-function-bodies ufooq_laneq_untied
FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -O2   check-function-bodies ufoo_untied
FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -O2   check-function-bodies ufooq_laneq_untied
FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -O2 -flto -fno-use-linker-plugin -flto-partition=none   check-function-bodies ufoo_untied
FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -O2 -flto -fno-use-linker-plugin -flto-partition=none   check-function-bodies ufooq_laneq_untied
FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -O3 -g   check-function-bodies ufoo_untied
FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -O3 -g   check-function-bodies ufooq_laneq_untied
FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -Og -g   check-function-bodies ufoo_untied
FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -Og -g   check-function-bodies ufooq_laneq_untied
FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -Os   check-function-bodies ufoo_untied
FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -Os   check-function-bodies ufooq_laneq_untied

From a quick inspection it looks like it's just an alternative regalloc that moves the mov + dot instructions around, similar to what you fixed in bfdot-2.c and vdot-3-2.c.
I guess they need a similar adjustment?
Thanks,
Kyrill

> 	* gcc.target/aarch64/sve/acle/asm/adda_f16.c: Likewise.
> 	* gcc.target/aarch64/sve/acle/asm/adda_f32.c: Likewise.
> 	* gcc.target/aarch64/sve/acle/asm/adda_f64.c: Likewise.
> 	* gcc.target/aarch64/sve/acle/asm/brka_b.c: Likewise.
> 	* gcc.target/aarch64/sve/acle/asm/brkb_b.c: Likewise.
> 	* gcc.target/aarch64/sve/acle/asm/brkn_b.c: Likewise.
> 	* gcc.target/aarch64/sve/acle/asm/clasta_bf16.c: Likewise.
> 	* gcc.target/aarch64/sve/acle/asm/clasta_f16.c: Likewise.
> 	* gcc.target/aarch64/sve/acle/asm/clasta_f32.c: Likewise.
> 	* gcc.target/aarch64/sve/acle/asm/clasta_f64.c: Likewise.
> 	* gcc.target/aarch64/sve/acle/asm/clastb_bf16.c: Likewise.
> 	* gcc.target/aarch64/sve/acle/asm/clastb_f16.c: Likewise.
> 	* gcc.target/aarch64/sve/acle/asm/clastb_f32.c: Likewise.
> 	* gcc.target/aarch64/sve/acle/asm/clastb_f64.c: Likewise.
> 	* gcc.target/aarch64/sve/acle/asm/pfirst_b.c: Likewise.
> 	* gcc.target/aarch64/sve/acle/asm/pnext_b16.c: Likewise.
> 	* gcc.target/aarch64/sve/acle/asm/pnext_b32.c: Likewise.
> 	* gcc.target/aarch64/sve/acle/asm/pnext_b64.c: Likewise.
> 	* gcc.target/aarch64/sve/acle/asm/pnext_b8.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/sli_s16.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/sli_s32.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/sli_s64.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/sli_s8.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/sli_u16.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/sli_u32.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/sli_u64.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/sli_u8.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/sri_s16.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/sri_s32.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/sri_s64.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/sri_s8.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/sri_u16.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/sri_u32.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/sri_u64.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/sri_u8.c: Likewise.
> ---
>  .../aarch64/advsimd-intrinsics/bfcvtnq2-untied.c  |  5 +++++
>  .../aarch64/advsimd-intrinsics/bfdot-1.c          | 10 ++++++++++
>  .../aarch64/advsimd-intrinsics/vdot-3-1.c         | 10 ++++++++++
>  .../gcc.target/aarch64/sve/acle/asm/adda_f16.c    |  5 +++++
>  .../gcc.target/aarch64/sve/acle/asm/adda_f32.c    |  5 +++++
>  .../gcc.target/aarch64/sve/acle/asm/adda_f64.c    |  5 +++++
>  .../gcc.target/aarch64/sve/acle/asm/brka_b.c      |  5 +++++
>  .../gcc.target/aarch64/sve/acle/asm/brkb_b.c      |  5 +++++
>  .../gcc.target/aarch64/sve/acle/asm/brkn_b.c      |  5 +++++
>  .../gcc.target/aarch64/sve/acle/asm/clasta_bf16.c |  5 +++++
>  .../gcc.target/aarch64/sve/acle/asm/clasta_f16.c  |  5 +++++
>  .../gcc.target/aarch64/sve/acle/asm/clasta_f32.c  |  5 +++++
>  .../gcc.target/aarch64/sve/acle/asm/clasta_f64.c  |  5 +++++
>  .../gcc.target/aarch64/sve/acle/asm/clastb_bf16.c |  5 +++++
>  .../gcc.target/aarch64/sve/acle/asm/clastb_f16.c  |  5 +++++
>  .../gcc.target/aarch64/sve/acle/asm/clastb_f32.c  |  5 +++++
>  .../gcc.target/aarch64/sve/acle/asm/clastb_f64.c  |  5 +++++
>  .../gcc.target/aarch64/sve/acle/asm/pfirst_b.c    |  5 +++++
>  .../gcc.target/aarch64/sve/acle/asm/pnext_b16.c   |  5 +++++
>  .../gcc.target/aarch64/sve/acle/asm/pnext_b32.c   |  5 +++++
>  .../gcc.target/aarch64/sve/acle/asm/pnext_b64.c   |  5 +++++
>  .../gcc.target/aarch64/sve/acle/asm/pnext_b8.c    |  5 +++++
>  .../gcc.target/aarch64/sve2/acle/asm/sli_s16.c    | 15 +++++++++++++++
>  .../gcc.target/aarch64/sve2/acle/asm/sli_s32.c    | 15 +++++++++++++++
>  .../gcc.target/aarch64/sve2/acle/asm/sli_s64.c    | 15 +++++++++++++++
>  .../gcc.target/aarch64/sve2/acle/asm/sli_s8.c     | 15 +++++++++++++++
>  .../gcc.target/aarch64/sve2/acle/asm/sli_u16.c    | 15 +++++++++++++++
>  .../gcc.target/aarch64/sve2/acle/asm/sli_u32.c    | 15 +++++++++++++++
>  .../gcc.target/aarch64/sve2/acle/asm/sli_u64.c    | 15 +++++++++++++++
>  .../gcc.target/aarch64/sve2/acle/asm/sli_u8.c     | 15 +++++++++++++++
>  .../gcc.target/aarch64/sve2/acle/asm/sri_s16.c    | 15 +++++++++++++++
>  .../gcc.target/aarch64/sve2/acle/asm/sri_s32.c    | 15 +++++++++++++++
>  .../gcc.target/aarch64/sve2/acle/asm/sri_s64.c    | 15 +++++++++++++++
>  .../gcc.target/aarch64/sve2/acle/asm/sri_s8.c     | 15 +++++++++++++++
>  .../gcc.target/aarch64/sve2/acle/asm/sri_u16.c    | 15 +++++++++++++++
>  .../gcc.target/aarch64/sve2/acle/asm/sri_u32.c    | 15 +++++++++++++++
>  .../gcc.target/aarch64/sve2/acle/asm/sri_u64.c    | 15 +++++++++++++++
>  .../gcc.target/aarch64/sve2/acle/asm/sri_u8.c     | 15 +++++++++++++++
>  38 files changed, 360 insertions(+)
> 
> diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-
> untied.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-
> untied.c
> index 4b730e39d4e..1143bb797bc 100644
> --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c
> +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c
> @@ -9,8 +9,13 @@
> 
>  /*
>  **test_bfcvtnq2_untied:
> +** (
>  **     mov	v0.16b, v1.16b
>  **     bfcvtn2	v0.8h, v2.4s
> +** |
> +**     bfcvtn2	v1.8h, v2.4s
> +**     mov	v0.16b, v1.16b
> +** )
>  **     ret
>  */
>  bfloat16x8_t test_bfcvtnq2_untied (bfloat16x8_t unused, bfloat16x8_t
> inactive,
> diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfdot-1.c
> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfdot-1.c
> index ad51507731b..a5baf57cd5c 100644
> --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfdot-1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfdot-1.c
> @@ -69,8 +69,13 @@ float32x4_t ufooq_lane(float32x4_t r, bfloat16x8_t x,
> bfloat16x4_t y)
> 
>  /*
>  **ufoo_untied:
> +** (
>  **	mov	v0.8b, v1.8b
>  **	bfdot	v0.2s, (v2.4h, v3.4h|v3.4h, v2.4h)
> +** |
> +**	bfdot	v1.2s, (v2.4h, v3.4h|v3.4h, v2.4h)
> +**	mov	v0.8b, v1.8b
> +** )
>  **	ret
>  */
>  float32x2_t ufoo_untied(float32x4_t unused, float32x2_t r, bfloat16x4_t x,
> bfloat16x4_t y)
> @@ -80,8 +85,13 @@ float32x2_t ufoo_untied(float32x4_t unused,
> float32x2_t r, bfloat16x4_t x, bfloa
> 
>  /*
>  **ufooq_lane_untied:
> +** (
>  **	mov	v0.16b, v1.16b
>  **	bfdot	v0.4s, v2.8h, v3.2h\[1\]
> +** |
> +**	bfdot	v1.4s, v2.8h, v3.2h\[1\]
> +**	mov	v0.16b, v1.16b
> +** )
>  **	ret
>  */
>  float32x4_t ufooq_lane_untied(float32x4_t unused, float32x4_t r,
> bfloat16x8_t x, bfloat16x4_t y)
> diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-1.c
> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-1.c
> index ac4f821e771..a245b9f792a 100644
> --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-1.c
> @@ -114,8 +114,13 @@ int32x4_t sfooq_laneq (int32x4_t r, int8x16_t x,
> uint8x16_t y)
> 
>  /*
>  **ufoo_untied:
> +** (
>  **	mov	v0\.8b, v1\.8b
>  **	usdot	v0\.2s, v2\.8b, v3\.8b
> +** |
> +**	usdot	v1\.2s, v2\.8b, v3\.8b
> +**	mov	v0\.8b, v1\.8b
> +** )
>  **	ret
>  */
>  int32x2_t ufoo_untied (int32x2_t unused, int32x2_t r, uint8x8_t x, int8x8_t y)
> @@ -125,8 +130,13 @@ int32x2_t ufoo_untied (int32x2_t unused, int32x2_t
> r, uint8x8_t x, int8x8_t y)
> 
>  /*
>  **ufooq_laneq_untied:
> +** (
>  **	mov	v0\.16b, v1\.16b
>  **	usdot	v0\.4s, v2\.16b, v3\.4b\[3\]
> +** |
> +**	usdot	v1\.4s, v2\.16b, v3\.4b\[3\]
> +**	mov	v0\.16b, v1\.16b
> +** )
>  **	ret
>  */
>  int32x4_t ufooq_laneq_untied (int32x2_t unused, int32x4_t r, uint8x16_t x,
> int8x16_t y)
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f16.c
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f16.c
> index 6c6bfa1c294..642c45ab492 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f16.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f16.c
> @@ -13,8 +13,13 @@ TEST_FOLD_LEFT_D (adda_d0_f16, float16_t,
> svfloat16_t,
> 
>  /*
>  ** adda_d1_f16:
> +** (
>  **	mov	v0\.h\[0\], v1\.h\[0\]
>  **	fadda	h0, p0, h0, z2\.h
> +** |
> +**	fadda	h1, p0, h1, z2\.h
> +**	mov	v0\.h\[0\], v1\.h\[0\]
> +** )
>  **	ret
>  */
>  TEST_FOLD_LEFT_D (adda_d1_f16, float16_t, svfloat16_t,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f32.c
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f32.c
> index 8b2a1dd1c68..79bdd3d8048 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f32.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f32.c
> @@ -13,8 +13,13 @@ TEST_FOLD_LEFT_D (adda_d0_f32, float32_t,
> svfloat32_t,
> 
>  /*
>  ** adda_d1_f32:
> +** (
>  **	fmov	s0, s1
>  **	fadda	s0, p0, s0, z2\.s
> +** |
> +**	fadda	s1, p0, s1, z2\.s
> +**	fmov	s0, s1
> +** )
>  **	ret
>  */
>  TEST_FOLD_LEFT_D (adda_d1_f32, float32_t, svfloat32_t,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f64.c
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f64.c
> index 90a56420a6a..c8f56772218 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f64.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f64.c
> @@ -13,8 +13,13 @@ TEST_FOLD_LEFT_D (adda_d0_f64, float64_t,
> svfloat64_t,
> 
>  /*
>  ** adda_d1_f64:
> +** (
>  **	fmov	d0, d1
>  **	fadda	d0, p0, d0, z2\.d
> +** |
> +**	fadda	d1, p0, d1, z2\.d
> +**	fmov	d0, d1
> +** )
>  **	ret
>  */
>  TEST_FOLD_LEFT_D (adda_d1_f64, float64_t, svfloat64_t,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brka_b.c
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brka_b.c
> index 63426cf947d..7a20a22d128 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brka_b.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brka_b.c
> @@ -27,8 +27,13 @@ TEST_UNIFORM_P (brka_b_m_tied2,
> 
>  /*
>  ** brka_b_m_untied:
> +** (
>  **	mov	p0\.b, p2\.b
>  **	brka	p0\.b, p3/m, p1\.b
> +** |
> +**	brka	p2\.b, p3/m, p1\.b
> +**	mov	p0\.b, p2\.b
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_P (brka_b_m_untied,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkb_b.c
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkb_b.c
> index 4f9a2c2d7b9..f1c8c436863 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkb_b.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkb_b.c
> @@ -27,8 +27,13 @@ TEST_UNIFORM_P (brkb_b_m_tied2,
> 
>  /*
>  ** brkb_b_m_untied:
> +** (
>  **	mov	p0\.b, p2\.b
>  **	brkb	p0\.b, p3/m, p1\.b
> +** |
> +**	brkb	p2\.b, p3/m, p1\.b
> +**	mov	p0\.b, p2\.b
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_P (brkb_b_m_untied,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkn_b.c
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkn_b.c
> index 229a5fff9eb..69e8eb6b0e5 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkn_b.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkn_b.c
> @@ -18,8 +18,13 @@ TEST_UNIFORM_P (brkn_b_z_tied2,
> 
>  /*
>  ** brkn_b_z_untied:
> +** (
>  **	mov	p0\.b, p2\.b
>  **	brkn	p0\.b, p3/z, p1\.b, p0\.b
> +** |
> +**	brkn	p2\.b, p3/z, p1\.b, p2\.b
> +**	mov	p0\.b, p2\.b
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_P (brkn_b_z_untied,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_bf16.c
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_bf16.c
> index a15e34400f6..54a1d1af178 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_bf16.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_bf16.c
> @@ -43,8 +43,13 @@ TEST_FOLD_LEFT_D (clasta_d0_bf16, bfloat16_t,
> svbfloat16_t,
> 
>  /*
>  ** clasta_d1_bf16:
> +** (
>  **	mov	v0\.h\[0\], v1\.h\[0\]
>  **	clasta	h0, p0, h0, z2\.h
> +** |
> +**	clasta	h1, p0, h1, z2\.h
> +**	mov	v0\.h\[0\], v1\.h\[0\]
> +** )
>  **	ret
>  */
>  TEST_FOLD_LEFT_D (clasta_d1_bf16, bfloat16_t, svbfloat16_t,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f16.c
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f16.c
> index d9a980f60c0..243cad40f56 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f16.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f16.c
> @@ -43,8 +43,13 @@ TEST_FOLD_LEFT_D (clasta_d0_f16, float16_t,
> svfloat16_t,
> 
>  /*
>  ** clasta_d1_f16:
> +** (
>  **	mov	v0\.h\[0\], v1\.h\[0\]
>  **	clasta	h0, p0, h0, z2\.h
> +** |
> +**	clasta	h1, p0, h1, z2\.h
> +**	mov	v0\.h\[0\], v1\.h\[0\]
> +** )
>  **	ret
>  */
>  TEST_FOLD_LEFT_D (clasta_d1_f16, float16_t, svfloat16_t,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f32.c
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f32.c
> index cac01fa6d64..44e700ada9a 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f32.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f32.c
> @@ -43,8 +43,13 @@ TEST_FOLD_LEFT_D (clasta_d0_f32, float32_t,
> svfloat32_t,
> 
>  /*
>  ** clasta_d1_f32:
> +** (
>  **	fmov	s0, s1
>  **	clasta	s0, p0, s0, z2\.s
> +** |
> +**	clasta	s1, p0, s1, z2\.s
> +**	fmov	s0, s1
> +** )
>  **	ret
>  */
>  TEST_FOLD_LEFT_D (clasta_d1_f32, float32_t, svfloat32_t,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f64.c
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f64.c
> index 43b93553ba8..fb147d51f0c 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f64.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f64.c
> @@ -43,8 +43,13 @@ TEST_FOLD_LEFT_D (clasta_d0_f64, float64_t,
> svfloat64_t,
> 
>  /*
>  ** clasta_d1_f64:
> +** (
>  **	fmov	d0, d1
>  **	clasta	d0, p0, d0, z2\.d
> +** |
> +**	clasta	d1, p0, d1, z2\.d
> +**	fmov	d0, d1
> +** )
>  **	ret
>  */
>  TEST_FOLD_LEFT_D (clasta_d1_f64, float64_t, svfloat64_t,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_bf16.c
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_bf16.c
> index 235fd1b4ed6..8dcb9a152b5 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_bf16.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_bf16.c
> @@ -43,8 +43,13 @@ TEST_FOLD_LEFT_D (clastb_d0_bf16, bfloat16_t,
> svbfloat16_t,
> 
>  /*
>  ** clastb_d1_bf16:
> +** (
>  **	mov	v0\.h\[0\], v1\.h\[0\]
>  **	clastb	h0, p0, h0, z2\.h
> +** |
> +**	clastb	h1, p0, h1, z2\.h
> +**	mov	v0\.h\[0\], v1\.h\[0\]
> +** )
>  **	ret
>  */
>  TEST_FOLD_LEFT_D (clastb_d1_bf16, bfloat16_t, svbfloat16_t,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f16.c
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f16.c
> index e56d7688a1c..08e63cee9e8 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f16.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f16.c
> @@ -43,8 +43,13 @@ TEST_FOLD_LEFT_D (clastb_d0_f16, float16_t,
> svfloat16_t,
> 
>  /*
>  ** clastb_d1_f16:
> +** (
>  **	mov	v0\.h\[0\], v1\.h\[0\]
>  **	clastb	h0, p0, h0, z2\.h
> +** |
> +**	clastb	h1, p0, h1, z2\.h
> +**	mov	v0\.h\[0\], v1\.h\[0\]
> +** )
>  **	ret
>  */
>  TEST_FOLD_LEFT_D (clastb_d1_f16, float16_t, svfloat16_t,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f32.c
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f32.c
> index c580d13064b..8d71344b2ce 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f32.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f32.c
> @@ -43,8 +43,13 @@ TEST_FOLD_LEFT_D (clastb_d0_f32, float32_t,
> svfloat32_t,
> 
>  /*
>  ** clastb_d1_f32:
> +** (
>  **	fmov	s0, s1
>  **	clastb	s0, p0, s0, z2\.s
> +** |
> +**	clastb	s1, p0, s1, z2\.s
> +**	fmov	s0, s1
> +** )
>  **	ret
>  */
>  TEST_FOLD_LEFT_D (clastb_d1_f32, float32_t, svfloat32_t,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f64.c
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f64.c
> index 217a76f5112..6b24dcad17e 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f64.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f64.c
> @@ -43,8 +43,13 @@ TEST_FOLD_LEFT_D (clastb_d0_f64, float64_t,
> svfloat64_t,
> 
>  /*
>  ** clastb_d1_f64:
> +** (
>  **	fmov	d0, d1
>  **	clastb	d0, p0, d0, z2\.d
> +** |
> +**	clastb	d1, p0, d1, z2\.d
> +**	fmov	d0, d1
> +** )
>  **	ret
>  */
>  TEST_FOLD_LEFT_D (clastb_d1_f64, float64_t, svfloat64_t,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pfirst_b.c
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pfirst_b.c
> index a32099656cc..9ec5f00c7f1 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pfirst_b.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pfirst_b.c
> @@ -13,8 +13,13 @@ TEST_UNIFORM_P (pfirst_b_tied1,
> 
>  /*
>  ** pfirst_b_untied:
> +** (
>  **	mov	p0\.b, p1\.b
>  **	pfirst	p0\.b, p3, p0\.b
> +** |
> +**	pfirst	p1\.b, p3, p1\.b
> +**	mov	p0\.b, p1\.b
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_P (pfirst_b_untied,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b16.c
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b16.c
> index ad0efe5e711..efb76e8ba8a 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b16.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b16.c
> @@ -13,8 +13,13 @@ TEST_UNIFORM_P (pnext_b16_tied1,
> 
>  /*
>  ** pnext_b16_untied:
> +** (
>  **	mov	p0\.b, p1\.b
>  **	pnext	p0\.h, p3, p0\.h
> +** |
> +**	pnext	p1\.h, p3, p1\.h
> +**	mov	p0\.b, p1\.b
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_P (pnext_b16_untied,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b32.c
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b32.c
> index a0030fae18d..1f57253fbf1 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b32.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b32.c
> @@ -13,8 +13,13 @@ TEST_UNIFORM_P (pnext_b32_tied1,
> 
>  /*
>  ** pnext_b32_untied:
> +** (
>  **	mov	p0\.b, p1\.b
>  **	pnext	p0\.s, p3, p0\.s
> +** |
> +**	pnext	p1\.s, p3, p1\.s
> +**	mov	p0\.b, p1\.b
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_P (pnext_b32_untied,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b64.c
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b64.c
> index 59db2f04f2a..eed5a56f134 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b64.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b64.c
> @@ -13,8 +13,13 @@ TEST_UNIFORM_P (pnext_b64_tied1,
> 
>  /*
>  ** pnext_b64_untied:
> +** (
>  **	mov	p0\.b, p1\.b
>  **	pnext	p0\.d, p3, p0\.d
> +** |
> +**	pnext	p1\.d, p3, p1\.d
> +**	mov	p0\.b, p1\.b
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_P (pnext_b64_untied,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b8.c
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b8.c
> index cfc2e907c25..a36d43c4cff 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b8.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b8.c
> @@ -13,8 +13,13 @@ TEST_UNIFORM_P (pnext_b8_tied1,
> 
>  /*
>  ** pnext_b8_untied:
> +** (
>  **	mov	p0\.b, p1\.b
>  **	pnext	p0\.b, p3, p0\.b
> +** |
> +**	pnext	p1\.b, p3, p1\.b
> +**	mov	p0\.b, p1\.b
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_P (pnext_b8_untied,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_s16.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_s16.c
> index 6772a5620d2..d91d499da5a 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_s16.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_s16.c
> @@ -18,8 +18,13 @@ TEST_UNIFORM_Z (sli_0_s16_tied2, svint16_t,
> 
>  /*
>  ** sli_0_s16_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sli	z0\.h, z2\.h, #0
> +** |
> +**	sli	z1\.h, z2\.h, #0
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sli_0_s16_untied, svint16_t,
> @@ -42,8 +47,13 @@ TEST_UNIFORM_Z (sli_1_s16_tied2, svint16_t,
> 
>  /*
>  ** sli_1_s16_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sli	z0\.h, z2\.h, #1
> +** |
> +**	sli	z1\.h, z2\.h, #1
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sli_1_s16_untied, svint16_t,
> @@ -66,8 +76,13 @@ TEST_UNIFORM_Z (sli_15_s16_tied2, svint16_t,
> 
>  /*
>  ** sli_15_s16_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sli	z0\.h, z2\.h, #15
> +** |
> +**	sli	z1\.h, z2\.h, #15
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sli_15_s16_untied, svint16_t,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_s32.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_s32.c
> index 023e7c40d3e..3ae507c432c 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_s32.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_s32.c
> @@ -18,8 +18,13 @@ TEST_UNIFORM_Z (sli_0_s32_tied2, svint32_t,
> 
>  /*
>  ** sli_0_s32_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sli	z0\.s, z2\.s, #0
> +** |
> +**	sli	z1\.s, z2\.s, #0
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sli_0_s32_untied, svint32_t,
> @@ -42,8 +47,13 @@ TEST_UNIFORM_Z (sli_1_s32_tied2, svint32_t,
> 
>  /*
>  ** sli_1_s32_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sli	z0\.s, z2\.s, #1
> +** |
> +**	sli	z1\.s, z2\.s, #1
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sli_1_s32_untied, svint32_t,
> @@ -66,8 +76,13 @@ TEST_UNIFORM_Z (sli_31_s32_tied2, svint32_t,
> 
>  /*
>  ** sli_31_s32_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sli	z0\.s, z2\.s, #31
> +** |
> +**	sli	z1\.s, z2\.s, #31
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sli_31_s32_untied, svint32_t,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_s64.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_s64.c
> index c37db1b4796..93c5723a804 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_s64.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_s64.c
> @@ -18,8 +18,13 @@ TEST_UNIFORM_Z (sli_0_s64_tied2, svint64_t,
> 
>  /*
>  ** sli_0_s64_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sli	z0\.d, z2\.d, #0
> +** |
> +**	sli	z1\.d, z2\.d, #0
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sli_0_s64_untied, svint64_t,
> @@ -42,8 +47,13 @@ TEST_UNIFORM_Z (sli_1_s64_tied2, svint64_t,
> 
>  /*
>  ** sli_1_s64_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sli	z0\.d, z2\.d, #1
> +** |
> +**	sli	z1\.d, z2\.d, #1
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sli_1_s64_untied, svint64_t,
> @@ -66,8 +76,13 @@ TEST_UNIFORM_Z (sli_63_s64_tied2, svint64_t,
> 
>  /*
>  ** sli_63_s64_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sli	z0\.d, z2\.d, #63
> +** |
> +**	sli	z1\.d, z2\.d, #63
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sli_63_s64_untied, svint64_t,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_s8.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_s8.c
> index ea0dcdc1871..5ac336f76d1 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_s8.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_s8.c
> @@ -18,8 +18,13 @@ TEST_UNIFORM_Z (sli_0_s8_tied2, svint8_t,
> 
>  /*
>  ** sli_0_s8_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sli	z0\.b, z2\.b, #0
> +** |
> +**	sli	z1\.b, z2\.b, #0
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sli_0_s8_untied, svint8_t,
> @@ -42,8 +47,13 @@ TEST_UNIFORM_Z (sli_1_s8_tied2, svint8_t,
> 
>  /*
>  ** sli_1_s8_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sli	z0\.b, z2\.b, #1
> +** |
> +**	sli	z1\.b, z2\.b, #1
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sli_1_s8_untied, svint8_t,
> @@ -66,8 +76,13 @@ TEST_UNIFORM_Z (sli_7_s8_tied2, svint8_t,
> 
>  /*
>  ** sli_7_s8_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sli	z0\.b, z2\.b, #7
> +** |
> +**	sli	z1\.b, z2\.b, #7
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sli_7_s8_untied, svint8_t,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_u16.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_u16.c
> index 475c00ea6a4..b6cbb55b8a0 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_u16.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_u16.c
> @@ -18,8 +18,13 @@ TEST_UNIFORM_Z (sli_0_u16_tied2, svuint16_t,
> 
>  /*
>  ** sli_0_u16_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sli	z0\.h, z2\.h, #0
> +** |
> +**	sli	z1\.h, z2\.h, #0
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sli_0_u16_untied, svuint16_t,
> @@ -42,8 +47,13 @@ TEST_UNIFORM_Z (sli_1_u16_tied2, svuint16_t,
> 
>  /*
>  ** sli_1_u16_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sli	z0\.h, z2\.h, #1
> +** |
> +**	sli	z1\.h, z2\.h, #1
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sli_1_u16_untied, svuint16_t,
> @@ -66,8 +76,13 @@ TEST_UNIFORM_Z (sli_15_u16_tied2, svuint16_t,
> 
>  /*
>  ** sli_15_u16_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sli	z0\.h, z2\.h, #15
> +** |
> +**	sli	z1\.h, z2\.h, #15
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sli_15_u16_untied, svuint16_t,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_u32.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_u32.c
> index 52bd8370e5f..654f4b6d670 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_u32.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_u32.c
> @@ -18,8 +18,13 @@ TEST_UNIFORM_Z (sli_0_u32_tied2, svuint32_t,
> 
>  /*
>  ** sli_0_u32_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sli	z0\.s, z2\.s, #0
> +** |
> +**	sli	z1\.s, z2\.s, #0
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sli_0_u32_untied, svuint32_t,
> @@ -42,8 +47,13 @@ TEST_UNIFORM_Z (sli_1_u32_tied2, svuint32_t,
> 
>  /*
>  ** sli_1_u32_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sli	z0\.s, z2\.s, #1
> +** |
> +**	sli	z1\.s, z2\.s, #1
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sli_1_u32_untied, svuint32_t,
> @@ -66,8 +76,13 @@ TEST_UNIFORM_Z (sli_31_u32_tied2, svuint32_t,
> 
>  /*
>  ** sli_31_u32_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sli	z0\.s, z2\.s, #31
> +** |
> +**	sli	z1\.s, z2\.s, #31
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sli_31_u32_untied, svuint32_t,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_u64.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_u64.c
> index ab75ba2e6d5..c5466a295cc 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_u64.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_u64.c
> @@ -18,8 +18,13 @@ TEST_UNIFORM_Z (sli_0_u64_tied2, svuint64_t,
> 
>  /*
>  ** sli_0_u64_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sli	z0\.d, z2\.d, #0
> +** |
> +**	sli	z1\.d, z2\.d, #0
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sli_0_u64_untied, svuint64_t,
> @@ -42,8 +47,13 @@ TEST_UNIFORM_Z (sli_1_u64_tied2, svuint64_t,
> 
>  /*
>  ** sli_1_u64_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sli	z0\.d, z2\.d, #1
> +** |
> +**	sli	z1\.d, z2\.d, #1
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sli_1_u64_untied, svuint64_t,
> @@ -66,8 +76,13 @@ TEST_UNIFORM_Z (sli_63_u64_tied2, svuint64_t,
> 
>  /*
>  ** sli_63_u64_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sli	z0\.d, z2\.d, #63
> +** |
> +**	sli	z1\.d, z2\.d, #63
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sli_63_u64_untied, svuint64_t,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_u8.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_u8.c
> index e2207c3c466..2b3533f5e71 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_u8.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_u8.c
> @@ -18,8 +18,13 @@ TEST_UNIFORM_Z (sli_0_u8_tied2, svuint8_t,
> 
>  /*
>  ** sli_0_u8_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sli	z0\.b, z2\.b, #0
> +** |
> +**	sli	z1\.b, z2\.b, #0
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sli_0_u8_untied, svuint8_t,
> @@ -42,8 +47,13 @@ TEST_UNIFORM_Z (sli_1_u8_tied2, svuint8_t,
> 
>  /*
>  ** sli_1_u8_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sli	z0\.b, z2\.b, #1
> +** |
> +**	sli	z1\.b, z2\.b, #1
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sli_1_u8_untied, svuint8_t,
> @@ -66,8 +76,13 @@ TEST_UNIFORM_Z (sli_7_u8_tied2, svuint8_t,
> 
>  /*
>  ** sli_7_u8_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sli	z0\.b, z2\.b, #7
> +** |
> +**	sli	z1\.b, z2\.b, #7
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sli_7_u8_untied, svuint8_t,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_s16.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_s16.c
> index 177fbb20d62..16cb73ce542 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_s16.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_s16.c
> @@ -18,8 +18,13 @@ TEST_UNIFORM_Z (sri_1_s16_tied2, svint16_t,
> 
>  /*
>  ** sri_1_s16_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sri	z0\.h, z2\.h, #1
> +** |
> +**	sri	z1\.h, z2\.h, #1
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sri_1_s16_untied, svint16_t,
> @@ -42,8 +47,13 @@ TEST_UNIFORM_Z (sri_2_s16_tied2, svint16_t,
> 
>  /*
>  ** sri_2_s16_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sri	z0\.h, z2\.h, #2
> +** |
> +**	sri	z1\.h, z2\.h, #2
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sri_2_s16_untied, svint16_t,
> @@ -66,8 +76,13 @@ TEST_UNIFORM_Z (sri_16_s16_tied2, svint16_t,
> 
>  /*
>  ** sri_16_s16_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sri	z0\.h, z2\.h, #16
> +** |
> +**	sri	z1\.h, z2\.h, #16
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sri_16_s16_untied, svint16_t,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_s32.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_s32.c
> index 27d6c99c3a0..3c69f622d74 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_s32.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_s32.c
> @@ -18,8 +18,13 @@ TEST_UNIFORM_Z (sri_1_s32_tied2, svint32_t,
> 
>  /*
>  ** sri_1_s32_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sri	z0\.s, z2\.s, #1
> +** |
> +**	sri	z1\.s, z2\.s, #1
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sri_1_s32_untied, svint32_t,
> @@ -42,8 +47,13 @@ TEST_UNIFORM_Z (sri_2_s32_tied2, svint32_t,
> 
>  /*
>  ** sri_2_s32_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sri	z0\.s, z2\.s, #2
> +** |
> +**	sri	z1\.s, z2\.s, #2
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sri_2_s32_untied, svint32_t,
> @@ -66,8 +76,13 @@ TEST_UNIFORM_Z (sri_32_s32_tied2, svint32_t,
> 
>  /*
>  ** sri_32_s32_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sri	z0\.s, z2\.s, #32
> +** |
> +**	sri	z1\.s, z2\.s, #32
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sri_32_s32_untied, svint32_t,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_s64.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_s64.c
> index 021613d0179..5c64e1bb51f 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_s64.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_s64.c
> @@ -18,8 +18,13 @@ TEST_UNIFORM_Z (sri_1_s64_tied2, svint64_t,
> 
>  /*
>  ** sri_1_s64_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sri	z0\.d, z2\.d, #1
> +** |
> +**	sri	z1\.d, z2\.d, #1
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sri_1_s64_untied, svint64_t,
> @@ -42,8 +47,13 @@ TEST_UNIFORM_Z (sri_2_s64_tied2, svint64_t,
> 
>  /*
>  ** sri_2_s64_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sri	z0\.d, z2\.d, #2
> +** |
> +**	sri	z1\.d, z2\.d, #2
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sri_2_s64_untied, svint64_t,
> @@ -66,8 +76,13 @@ TEST_UNIFORM_Z (sri_64_s64_tied2, svint64_t,
> 
>  /*
>  ** sri_64_s64_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sri	z0\.d, z2\.d, #64
> +** |
> +**	sri	z1\.d, z2\.d, #64
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sri_64_s64_untied, svint64_t,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_s8.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_s8.c
> index 0bfa2678559..1871bb47645 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_s8.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_s8.c
> @@ -18,8 +18,13 @@ TEST_UNIFORM_Z (sri_1_s8_tied2, svint8_t,
> 
>  /*
>  ** sri_1_s8_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sri	z0\.b, z2\.b, #1
> +** |
> +**	sri	z1\.b, z2\.b, #1
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sri_1_s8_untied, svint8_t,
> @@ -42,8 +47,13 @@ TEST_UNIFORM_Z (sri_2_s8_tied2, svint8_t,
> 
>  /*
>  ** sri_2_s8_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sri	z0\.b, z2\.b, #2
> +** |
> +**	sri	z1\.b, z2\.b, #2
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sri_2_s8_untied, svint8_t,
> @@ -66,8 +76,13 @@ TEST_UNIFORM_Z (sri_8_s8_tied2, svint8_t,
> 
>  /*
>  ** sri_8_s8_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sri	z0\.b, z2\.b, #8
> +** |
> +**	sri	z1\.b, z2\.b, #8
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sri_8_s8_untied, svint8_t,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_u16.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_u16.c
> index 2f12dc90857..ce6e838f7db 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_u16.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_u16.c
> @@ -18,8 +18,13 @@ TEST_UNIFORM_Z (sri_1_u16_tied2, svuint16_t,
> 
>  /*
>  ** sri_1_u16_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sri	z0\.h, z2\.h, #1
> +** |
> +**	sri	z1\.h, z2\.h, #1
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sri_1_u16_untied, svuint16_t,
> @@ -42,8 +47,13 @@ TEST_UNIFORM_Z (sri_2_u16_tied2, svuint16_t,
> 
>  /*
>  ** sri_2_u16_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sri	z0\.h, z2\.h, #2
> +** |
> +**	sri	z1\.h, z2\.h, #2
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sri_2_u16_untied, svuint16_t,
> @@ -66,8 +76,13 @@ TEST_UNIFORM_Z (sri_16_u16_tied2, svuint16_t,
> 
>  /*
>  ** sri_16_u16_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sri	z0\.h, z2\.h, #16
> +** |
> +**	sri	z1\.h, z2\.h, #16
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sri_16_u16_untied, svuint16_t,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_u32.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_u32.c
> index d4d107f55cc..7cf6fea771b 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_u32.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_u32.c
> @@ -18,8 +18,13 @@ TEST_UNIFORM_Z (sri_1_u32_tied2, svuint32_t,
> 
>  /*
>  ** sri_1_u32_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sri	z0\.s, z2\.s, #1
> +** |
> +**	sri	z1\.s, z2\.s, #1
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sri_1_u32_untied, svuint32_t,
> @@ -42,8 +47,13 @@ TEST_UNIFORM_Z (sri_2_u32_tied2, svuint32_t,
> 
>  /*
>  ** sri_2_u32_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sri	z0\.s, z2\.s, #2
> +** |
> +**	sri	z1\.s, z2\.s, #2
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sri_2_u32_untied, svuint32_t,
> @@ -66,8 +76,13 @@ TEST_UNIFORM_Z (sri_32_u32_tied2, svuint32_t,
> 
>  /*
>  ** sri_32_u32_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sri	z0\.s, z2\.s, #32
> +** |
> +**	sri	z1\.s, z2\.s, #32
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sri_32_u32_untied, svuint32_t,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_u64.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_u64.c
> index 41d67346f25..be61f85f265 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_u64.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_u64.c
> @@ -18,8 +18,13 @@ TEST_UNIFORM_Z (sri_1_u64_tied2, svuint64_t,
> 
>  /*
>  ** sri_1_u64_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sri	z0\.d, z2\.d, #1
> +** |
> +**	sri	z1\.d, z2\.d, #1
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sri_1_u64_untied, svuint64_t,
> @@ -42,8 +47,13 @@ TEST_UNIFORM_Z (sri_2_u64_tied2, svuint64_t,
> 
>  /*
>  ** sri_2_u64_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sri	z0\.d, z2\.d, #2
> +** |
> +**	sri	z1\.d, z2\.d, #2
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sri_2_u64_untied, svuint64_t,
> @@ -66,8 +76,13 @@ TEST_UNIFORM_Z (sri_64_u64_tied2, svuint64_t,
> 
>  /*
>  ** sri_64_u64_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sri	z0\.d, z2\.d, #64
> +** |
> +**	sri	z1\.d, z2\.d, #64
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sri_64_u64_untied, svuint64_t,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_u8.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_u8.c
> index 0aa6a543860..84de5a2b2e9 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_u8.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_u8.c
> @@ -18,8 +18,13 @@ TEST_UNIFORM_Z (sri_1_u8_tied2, svuint8_t,
> 
>  /*
>  ** sri_1_u8_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sri	z0\.b, z2\.b, #1
> +** |
> +**	sri	z1\.b, z2\.b, #1
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sri_1_u8_untied, svuint8_t,
> @@ -42,8 +47,13 @@ TEST_UNIFORM_Z (sri_2_u8_tied2, svuint8_t,
> 
>  /*
>  ** sri_2_u8_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sri	z0\.b, z2\.b, #2
> +** |
> +**	sri	z1\.b, z2\.b, #2
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sri_2_u8_untied, svuint8_t,
> @@ -66,8 +76,13 @@ TEST_UNIFORM_Z (sri_8_u8_tied2, svuint8_t,
> 
>  /*
>  ** sri_8_u8_untied:
> +** (
>  **	mov	z0\.d, z1\.d
>  **	sri	z0\.b, z2\.b, #8
> +** |
> +**	sri	z1\.b, z2\.b, #8
> +**	mov	z0\.d, z1\.d
> +** )
>  **	ret
>  */
>  TEST_UNIFORM_Z (sri_8_u8_untied, svuint8_t,
> --
> 2.25.1
  
Richard Sandiford May 15, 2023, 2:18 p.m. UTC | #2
Kyrylo Tkachov <Kyrylo.Tkachov@arm.com> writes:
> Hi Richard,
>
>> -----Original Message-----
>> From: Gcc-patches <gcc-patches-
>> bounces+kyrylo.tkachov=arm.com@gcc.gnu.org> On Behalf Of Richard
>> Sandiford via Gcc-patches
>> Sent: Tuesday, May 9, 2023 7:48 AM
>> To: gcc-patches@gcc.gnu.org
>> Cc: Richard Sandiford <Richard.Sandiford@arm.com>
>> Subject: [PATCH 2/6] aarch64: Allow moves after tied-register intrinsics
>>
>> Some ACLE intrinsics map to instructions that tie the output
>> operand to an input operand.  If all the operands are allocated
>> to different registers, and if MOVPRFX can't be used, we will need
>> a move either before the instruction or after it.  Many tests only
>> matched the "before" case; this patch makes them accept the "after"
>> case too.
>>
>> gcc/testsuite/
>>       * gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c: Allow
>>       moves to occur after the intrinsic instruction, rather than requiring
>>       them to happen before.
>>       * gcc.target/aarch64/advsimd-intrinsics/bfdot-1.c: Likewise.
>>       * gcc.target/aarch64/advsimd-intrinsics/vdot-3-1.c: Likewise.
>
> I'm seeing some dot-product intrinsics failures:
> FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -O1   check-function-bodies ufoo_untied
> FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -O1   check-function-bodies ufooq_lane_untied
> FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -O2   check-function-bodies ufoo_untied
> FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -O2   check-function-bodies ufooq_lane_untied
> FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -O2 -flto -fno-use-linker-plugin -flto-partition=none   check-function-bodies ufoo_untied
> FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -O2 -flto -fno-use-linker-plugin -flto-partition=none   check-function-bodies ufooq_lane_untied
> FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -O3 -g   check-function-bodies ufoo_untied
> FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -O3 -g   check-function-bodies ufooq_lane_untied
> FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -Og -g   check-function-bodies ufoo_untied
> FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -Og -g   check-function-bodies ufooq_lane_untied
> FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -Os   check-function-bodies ufoo_untied
> FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -Os   check-function-bodies ufooq_lane_untied
> FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -O1   check-function-bodies ufoo_untied
> FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -O1   check-function-bodies ufooq_laneq_untied
> FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -O2   check-function-bodies ufoo_untied
> FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -O2   check-function-bodies ufooq_laneq_untied
> FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -O2 -flto -fno-use-linker-plugin -flto-partition=none   check-function-bodies ufoo_untied
> FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -O2 -flto -fno-use-linker-plugin -flto-partition=none   check-function-bodies ufooq_laneq_untied
> FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -O3 -g   check-function-bodies ufoo_untied
> FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -O3 -g   check-function-bodies ufooq_laneq_untied
> FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -Og -g   check-function-bodies ufoo_untied
> FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -Og -g   check-function-bodies ufooq_laneq_untied
> FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -Os   check-function-bodies ufoo_untied
> FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -Os   check-function-bodies ufooq_laneq_untied

Ugh.  Big-endian.  Hadn't thought about that being an issue.
Was testing natively on little-endian aarch64-linux-gnu and
didn't see these.

> From a quick inspection it looks like it's just an alternative regalloc that moves the mov + dot instructions around, similar to what you fixed in bfdot-2.c and vdot-3-2.c.
> I guess they need a similar adjustment?

Yeah, will fix.

Thanks,
Richard
  
Kyrylo Tkachov May 15, 2023, 2:22 p.m. UTC | #3
> -----Original Message-----
> From: Richard Sandiford <richard.sandiford@arm.com>
> Sent: Monday, May 15, 2023 3:18 PM
> To: Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>
> Cc: gcc-patches@gcc.gnu.org
> Subject: Re: [PATCH 2/6] aarch64: Allow moves after tied-register intrinsics
> 
> Kyrylo Tkachov <Kyrylo.Tkachov@arm.com> writes:
> > Hi Richard,
> >
> >> -----Original Message-----
> >> From: Gcc-patches <gcc-patches-
> >> bounces+kyrylo.tkachov=arm.com@gcc.gnu.org> On Behalf Of Richard
> >> Sandiford via Gcc-patches
> >> Sent: Tuesday, May 9, 2023 7:48 AM
> >> To: gcc-patches@gcc.gnu.org
> >> Cc: Richard Sandiford <Richard.Sandiford@arm.com>
> >> Subject: [PATCH 2/6] aarch64: Allow moves after tied-register intrinsics
> >>
> >> Some ACLE intrinsics map to instructions that tie the output
> >> operand to an input operand.  If all the operands are allocated
> >> to different registers, and if MOVPRFX can't be used, we will need
> >> a move either before the instruction or after it.  Many tests only
> >> matched the "before" case; this patch makes them accept the "after"
> >> case too.
> >>
> >> gcc/testsuite/
> >>       * gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c: Allow
> >>       moves to occur after the intrinsic instruction, rather than requiring
> >>       them to happen before.
> >>       * gcc.target/aarch64/advsimd-intrinsics/bfdot-1.c: Likewise.
> >>       * gcc.target/aarch64/advsimd-intrinsics/vdot-3-1.c: Likewise.
> >
> > I'm seeing some dot-product intrinsics failures:
> > FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -O1   check-function-
> bodies ufoo_untied
> > FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -O1   check-function-
> bodies ufooq_lane_untied
> > FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -O2   check-function-
> bodies ufoo_untied
> > FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -O2   check-function-
> bodies ufooq_lane_untied
> > FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -O2 -flto -fno-use-
> linker-plugin -flto-partition=none   check-function-bodies ufoo_untied
> > FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -O2 -flto -fno-use-
> linker-plugin -flto-partition=none   check-function-bodies ufooq_lane_untied
> > FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -O3 -g   check-
> function-bodies ufoo_untied
> > FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -O3 -g   check-
> function-bodies ufooq_lane_untied
> > FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -Og -g   check-
> function-bodies ufoo_untied
> > FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -Og -g   check-
> function-bodies ufooq_lane_untied
> > FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -Os   check-function-
> bodies ufoo_untied
> > FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -Os   check-function-
> bodies ufooq_lane_untied
> > FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -O1   check-
> function-bodies ufoo_untied
> > FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -O1   check-
> function-bodies ufooq_laneq_untied
> > FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -O2   check-
> function-bodies ufoo_untied
> > FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -O2   check-
> function-bodies ufooq_laneq_untied
> > FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -O2 -flto -fno-use-
> linker-plugin -flto-partition=none   check-function-bodies ufoo_untied
> > FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -O2 -flto -fno-use-
> linker-plugin -flto-partition=none   check-function-bodies
> ufooq_laneq_untied
> > FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -O3 -g   check-
> function-bodies ufoo_untied
> > FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -O3 -g   check-
> function-bodies ufooq_laneq_untied
> > FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -Og -g   check-
> function-bodies ufoo_untied
> > FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -Og -g   check-
> function-bodies ufooq_laneq_untied
> > FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -Os   check-function-
> bodies ufoo_untied
> > FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -Os   check-function-
> bodies ufooq_laneq_untied
> 
> Ugh.  Big-endian.  Hadn't thought about that being an issue.
> Was testing natively on little-endian aarch64-linux-gnu and
> didn't see these.

FWIW this is on a little-endian aarch64-none-elf configuration.
Maybe some defaults are different on bare-metal from Linux...

> 
> > From a quick inspection it looks like it's just an alternative regalloc that
> moves the mov + dot instructions around, similar to what you fixed in bfdot-
> 2.c and vdot-3-2.c.
> > I guess they need a similar adjustment?
> 
> Yeah, will fix.

Thanks!
Kyrill

> 
> Thanks,
> Richard
  
Richard Sandiford May 15, 2023, 2:25 p.m. UTC | #4
Kyrylo Tkachov <Kyrylo.Tkachov@arm.com> writes:
>> -----Original Message-----
>> From: Richard Sandiford <richard.sandiford@arm.com>
>> Sent: Monday, May 15, 2023 3:18 PM
>> To: Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>
>> Cc: gcc-patches@gcc.gnu.org
>> Subject: Re: [PATCH 2/6] aarch64: Allow moves after tied-register intrinsics
>> 
>> Kyrylo Tkachov <Kyrylo.Tkachov@arm.com> writes:
>> > Hi Richard,
>> >
>> >> -----Original Message-----
>> >> From: Gcc-patches <gcc-patches-
>> >> bounces+kyrylo.tkachov=arm.com@gcc.gnu.org> On Behalf Of Richard
>> >> Sandiford via Gcc-patches
>> >> Sent: Tuesday, May 9, 2023 7:48 AM
>> >> To: gcc-patches@gcc.gnu.org
>> >> Cc: Richard Sandiford <Richard.Sandiford@arm.com>
>> >> Subject: [PATCH 2/6] aarch64: Allow moves after tied-register intrinsics
>> >>
>> >> Some ACLE intrinsics map to instructions that tie the output
>> >> operand to an input operand.  If all the operands are allocated
>> >> to different registers, and if MOVPRFX can't be used, we will need
>> >> a move either before the instruction or after it.  Many tests only
>> >> matched the "before" case; this patch makes them accept the "after"
>> >> case too.
>> >>
>> >> gcc/testsuite/
>> >>       * gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c: Allow
>> >>       moves to occur after the intrinsic instruction, rather than requiring
>> >>       them to happen before.
>> >>       * gcc.target/aarch64/advsimd-intrinsics/bfdot-1.c: Likewise.
>> >>       * gcc.target/aarch64/advsimd-intrinsics/vdot-3-1.c: Likewise.
>> >
>> > I'm seeing some dot-product intrinsics failures:
>> > FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -O1   check-function-
>> bodies ufoo_untied
>> > FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -O1   check-function-
>> bodies ufooq_lane_untied
>> > FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -O2   check-function-
>> bodies ufoo_untied
>> > FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -O2   check-function-
>> bodies ufooq_lane_untied
>> > FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -O2 -flto -fno-use-
>> linker-plugin -flto-partition=none   check-function-bodies ufoo_untied
>> > FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -O2 -flto -fno-use-
>> linker-plugin -flto-partition=none   check-function-bodies ufooq_lane_untied
>> > FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -O3 -g   check-
>> function-bodies ufoo_untied
>> > FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -O3 -g   check-
>> function-bodies ufooq_lane_untied
>> > FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -Og -g   check-
>> function-bodies ufoo_untied
>> > FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -Og -g   check-
>> function-bodies ufooq_lane_untied
>> > FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -Os   check-function-
>> bodies ufoo_untied
>> > FAIL: gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c   -Os   check-function-
>> bodies ufooq_lane_untied
>> > FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -O1   check-
>> function-bodies ufoo_untied
>> > FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -O1   check-
>> function-bodies ufooq_laneq_untied
>> > FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -O2   check-
>> function-bodies ufoo_untied
>> > FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -O2   check-
>> function-bodies ufooq_laneq_untied
>> > FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -O2 -flto -fno-use-
>> linker-plugin -flto-partition=none   check-function-bodies ufoo_untied
>> > FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -O2 -flto -fno-use-
>> linker-plugin -flto-partition=none   check-function-bodies
>> ufooq_laneq_untied
>> > FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -O3 -g   check-
>> function-bodies ufoo_untied
>> > FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -O3 -g   check-
>> function-bodies ufooq_laneq_untied
>> > FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -Og -g   check-
>> function-bodies ufoo_untied
>> > FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -Og -g   check-
>> function-bodies ufooq_laneq_untied
>> > FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -Os   check-function-
>> bodies ufoo_untied
>> > FAIL: gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c   -Os   check-function-
>> bodies ufooq_laneq_untied
>> 
>> Ugh.  Big-endian.  Hadn't thought about that being an issue.
>> Was testing natively on little-endian aarch64-linux-gnu and
>> didn't see these.
>
> FWIW this is on a little-endian aarch64-none-elf configuration.

Yeah, but the tests force big-endian, and require a <stdint.h> that
supports big-endian.  Newlib supports both endiannesses, but a given
glibc installation doesn't.  So the tests will be exercied on *-elf
of any endianness, but will only be exercised on *-linux-gnu for
big-endian.

Richard
  

Patch

diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c
index 4b730e39d4e..1143bb797bc 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c
@@ -9,8 +9,13 @@ 
 
 /*
 **test_bfcvtnq2_untied:
+** (
 **     mov	v0.16b, v1.16b
 **     bfcvtn2	v0.8h, v2.4s
+** |
+**     bfcvtn2	v1.8h, v2.4s
+**     mov	v0.16b, v1.16b
+** )
 **     ret
 */
 bfloat16x8_t test_bfcvtnq2_untied (bfloat16x8_t unused, bfloat16x8_t inactive,
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfdot-1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfdot-1.c
index ad51507731b..a5baf57cd5c 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfdot-1.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfdot-1.c
@@ -69,8 +69,13 @@  float32x4_t ufooq_lane(float32x4_t r, bfloat16x8_t x, bfloat16x4_t y)
 
 /*
 **ufoo_untied:
+** (
 **	mov	v0.8b, v1.8b
 **	bfdot	v0.2s, (v2.4h, v3.4h|v3.4h, v2.4h)
+** |
+**	bfdot	v1.2s, (v2.4h, v3.4h|v3.4h, v2.4h)
+**	mov	v0.8b, v1.8b
+** )
 **	ret
 */
 float32x2_t ufoo_untied(float32x4_t unused, float32x2_t r, bfloat16x4_t x, bfloat16x4_t y)
@@ -80,8 +85,13 @@  float32x2_t ufoo_untied(float32x4_t unused, float32x2_t r, bfloat16x4_t x, bfloa
 
 /*
 **ufooq_lane_untied:
+** (
 **	mov	v0.16b, v1.16b
 **	bfdot	v0.4s, v2.8h, v3.2h\[1\]
+** |
+**	bfdot	v1.4s, v2.8h, v3.2h\[1\]
+**	mov	v0.16b, v1.16b
+** )
 **	ret
 */
 float32x4_t ufooq_lane_untied(float32x4_t unused, float32x4_t r, bfloat16x8_t x, bfloat16x4_t y)
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-1.c
index ac4f821e771..a245b9f792a 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-1.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-1.c
@@ -114,8 +114,13 @@  int32x4_t sfooq_laneq (int32x4_t r, int8x16_t x, uint8x16_t y)
 
 /*
 **ufoo_untied:
+** (
 **	mov	v0\.8b, v1\.8b
 **	usdot	v0\.2s, v2\.8b, v3\.8b
+** |
+**	usdot	v1\.2s, v2\.8b, v3\.8b
+**	mov	v0\.8b, v1\.8b
+** )
 **	ret
 */
 int32x2_t ufoo_untied (int32x2_t unused, int32x2_t r, uint8x8_t x, int8x8_t y)
@@ -125,8 +130,13 @@  int32x2_t ufoo_untied (int32x2_t unused, int32x2_t r, uint8x8_t x, int8x8_t y)
 
 /*
 **ufooq_laneq_untied:
+** (
 **	mov	v0\.16b, v1\.16b
 **	usdot	v0\.4s, v2\.16b, v3\.4b\[3\]
+** |
+**	usdot	v1\.4s, v2\.16b, v3\.4b\[3\]
+**	mov	v0\.16b, v1\.16b
+** )
 **	ret
 */
 int32x4_t ufooq_laneq_untied (int32x2_t unused, int32x4_t r, uint8x16_t x, int8x16_t y)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f16.c
index 6c6bfa1c294..642c45ab492 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f16.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f16.c
@@ -13,8 +13,13 @@  TEST_FOLD_LEFT_D (adda_d0_f16, float16_t, svfloat16_t,
 
 /*
 ** adda_d1_f16:
+** (
 **	mov	v0\.h\[0\], v1\.h\[0\]
 **	fadda	h0, p0, h0, z2\.h
+** |
+**	fadda	h1, p0, h1, z2\.h
+**	mov	v0\.h\[0\], v1\.h\[0\]
+** )
 **	ret
 */
 TEST_FOLD_LEFT_D (adda_d1_f16, float16_t, svfloat16_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f32.c
index 8b2a1dd1c68..79bdd3d8048 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f32.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f32.c
@@ -13,8 +13,13 @@  TEST_FOLD_LEFT_D (adda_d0_f32, float32_t, svfloat32_t,
 
 /*
 ** adda_d1_f32:
+** (
 **	fmov	s0, s1
 **	fadda	s0, p0, s0, z2\.s
+** |
+**	fadda	s1, p0, s1, z2\.s
+**	fmov	s0, s1
+** )
 **	ret
 */
 TEST_FOLD_LEFT_D (adda_d1_f32, float32_t, svfloat32_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f64.c
index 90a56420a6a..c8f56772218 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f64.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f64.c
@@ -13,8 +13,13 @@  TEST_FOLD_LEFT_D (adda_d0_f64, float64_t, svfloat64_t,
 
 /*
 ** adda_d1_f64:
+** (
 **	fmov	d0, d1
 **	fadda	d0, p0, d0, z2\.d
+** |
+**	fadda	d1, p0, d1, z2\.d
+**	fmov	d0, d1
+** )
 **	ret
 */
 TEST_FOLD_LEFT_D (adda_d1_f64, float64_t, svfloat64_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brka_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brka_b.c
index 63426cf947d..7a20a22d128 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brka_b.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brka_b.c
@@ -27,8 +27,13 @@  TEST_UNIFORM_P (brka_b_m_tied2,
 
 /*
 ** brka_b_m_untied:
+** (
 **	mov	p0\.b, p2\.b
 **	brka	p0\.b, p3/m, p1\.b
+** |
+**	brka	p2\.b, p3/m, p1\.b
+**	mov	p0\.b, p2\.b
+** )
 **	ret
 */
 TEST_UNIFORM_P (brka_b_m_untied,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkb_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkb_b.c
index 4f9a2c2d7b9..f1c8c436863 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkb_b.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkb_b.c
@@ -27,8 +27,13 @@  TEST_UNIFORM_P (brkb_b_m_tied2,
 
 /*
 ** brkb_b_m_untied:
+** (
 **	mov	p0\.b, p2\.b
 **	brkb	p0\.b, p3/m, p1\.b
+** |
+**	brkb	p2\.b, p3/m, p1\.b
+**	mov	p0\.b, p2\.b
+** )
 **	ret
 */
 TEST_UNIFORM_P (brkb_b_m_untied,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkn_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkn_b.c
index 229a5fff9eb..69e8eb6b0e5 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkn_b.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkn_b.c
@@ -18,8 +18,13 @@  TEST_UNIFORM_P (brkn_b_z_tied2,
 
 /*
 ** brkn_b_z_untied:
+** (
 **	mov	p0\.b, p2\.b
 **	brkn	p0\.b, p3/z, p1\.b, p0\.b
+** |
+**	brkn	p2\.b, p3/z, p1\.b, p2\.b
+**	mov	p0\.b, p2\.b
+** )
 **	ret
 */
 TEST_UNIFORM_P (brkn_b_z_untied,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_bf16.c
index a15e34400f6..54a1d1af178 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_bf16.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_bf16.c
@@ -43,8 +43,13 @@  TEST_FOLD_LEFT_D (clasta_d0_bf16, bfloat16_t, svbfloat16_t,
 
 /*
 ** clasta_d1_bf16:
+** (
 **	mov	v0\.h\[0\], v1\.h\[0\]
 **	clasta	h0, p0, h0, z2\.h
+** |
+**	clasta	h1, p0, h1, z2\.h
+**	mov	v0\.h\[0\], v1\.h\[0\]
+** )
 **	ret
 */
 TEST_FOLD_LEFT_D (clasta_d1_bf16, bfloat16_t, svbfloat16_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f16.c
index d9a980f60c0..243cad40f56 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f16.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f16.c
@@ -43,8 +43,13 @@  TEST_FOLD_LEFT_D (clasta_d0_f16, float16_t, svfloat16_t,
 
 /*
 ** clasta_d1_f16:
+** (
 **	mov	v0\.h\[0\], v1\.h\[0\]
 **	clasta	h0, p0, h0, z2\.h
+** |
+**	clasta	h1, p0, h1, z2\.h
+**	mov	v0\.h\[0\], v1\.h\[0\]
+** )
 **	ret
 */
 TEST_FOLD_LEFT_D (clasta_d1_f16, float16_t, svfloat16_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f32.c
index cac01fa6d64..44e700ada9a 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f32.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f32.c
@@ -43,8 +43,13 @@  TEST_FOLD_LEFT_D (clasta_d0_f32, float32_t, svfloat32_t,
 
 /*
 ** clasta_d1_f32:
+** (
 **	fmov	s0, s1
 **	clasta	s0, p0, s0, z2\.s
+** |
+**	clasta	s1, p0, s1, z2\.s
+**	fmov	s0, s1
+** )
 **	ret
 */
 TEST_FOLD_LEFT_D (clasta_d1_f32, float32_t, svfloat32_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f64.c
index 43b93553ba8..fb147d51f0c 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f64.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f64.c
@@ -43,8 +43,13 @@  TEST_FOLD_LEFT_D (clasta_d0_f64, float64_t, svfloat64_t,
 
 /*
 ** clasta_d1_f64:
+** (
 **	fmov	d0, d1
 **	clasta	d0, p0, d0, z2\.d
+** |
+**	clasta	d1, p0, d1, z2\.d
+**	fmov	d0, d1
+** )
 **	ret
 */
 TEST_FOLD_LEFT_D (clasta_d1_f64, float64_t, svfloat64_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_bf16.c
index 235fd1b4ed6..8dcb9a152b5 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_bf16.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_bf16.c
@@ -43,8 +43,13 @@  TEST_FOLD_LEFT_D (clastb_d0_bf16, bfloat16_t, svbfloat16_t,
 
 /*
 ** clastb_d1_bf16:
+** (
 **	mov	v0\.h\[0\], v1\.h\[0\]
 **	clastb	h0, p0, h0, z2\.h
+** |
+**	clastb	h1, p0, h1, z2\.h
+**	mov	v0\.h\[0\], v1\.h\[0\]
+** )
 **	ret
 */
 TEST_FOLD_LEFT_D (clastb_d1_bf16, bfloat16_t, svbfloat16_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f16.c
index e56d7688a1c..08e63cee9e8 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f16.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f16.c
@@ -43,8 +43,13 @@  TEST_FOLD_LEFT_D (clastb_d0_f16, float16_t, svfloat16_t,
 
 /*
 ** clastb_d1_f16:
+** (
 **	mov	v0\.h\[0\], v1\.h\[0\]
 **	clastb	h0, p0, h0, z2\.h
+** |
+**	clastb	h1, p0, h1, z2\.h
+**	mov	v0\.h\[0\], v1\.h\[0\]
+** )
 **	ret
 */
 TEST_FOLD_LEFT_D (clastb_d1_f16, float16_t, svfloat16_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f32.c
index c580d13064b..8d71344b2ce 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f32.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f32.c
@@ -43,8 +43,13 @@  TEST_FOLD_LEFT_D (clastb_d0_f32, float32_t, svfloat32_t,
 
 /*
 ** clastb_d1_f32:
+** (
 **	fmov	s0, s1
 **	clastb	s0, p0, s0, z2\.s
+** |
+**	clastb	s1, p0, s1, z2\.s
+**	fmov	s0, s1
+** )
 **	ret
 */
 TEST_FOLD_LEFT_D (clastb_d1_f32, float32_t, svfloat32_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f64.c
index 217a76f5112..6b24dcad17e 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f64.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f64.c
@@ -43,8 +43,13 @@  TEST_FOLD_LEFT_D (clastb_d0_f64, float64_t, svfloat64_t,
 
 /*
 ** clastb_d1_f64:
+** (
 **	fmov	d0, d1
 **	clastb	d0, p0, d0, z2\.d
+** |
+**	clastb	d1, p0, d1, z2\.d
+**	fmov	d0, d1
+** )
 **	ret
 */
 TEST_FOLD_LEFT_D (clastb_d1_f64, float64_t, svfloat64_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pfirst_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pfirst_b.c
index a32099656cc..9ec5f00c7f1 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pfirst_b.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pfirst_b.c
@@ -13,8 +13,13 @@  TEST_UNIFORM_P (pfirst_b_tied1,
 
 /*
 ** pfirst_b_untied:
+** (
 **	mov	p0\.b, p1\.b
 **	pfirst	p0\.b, p3, p0\.b
+** |
+**	pfirst	p1\.b, p3, p1\.b
+**	mov	p0\.b, p1\.b
+** )
 **	ret
 */
 TEST_UNIFORM_P (pfirst_b_untied,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b16.c
index ad0efe5e711..efb76e8ba8a 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b16.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b16.c
@@ -13,8 +13,13 @@  TEST_UNIFORM_P (pnext_b16_tied1,
 
 /*
 ** pnext_b16_untied:
+** (
 **	mov	p0\.b, p1\.b
 **	pnext	p0\.h, p3, p0\.h
+** |
+**	pnext	p1\.h, p3, p1\.h
+**	mov	p0\.b, p1\.b
+** )
 **	ret
 */
 TEST_UNIFORM_P (pnext_b16_untied,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b32.c
index a0030fae18d..1f57253fbf1 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b32.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b32.c
@@ -13,8 +13,13 @@  TEST_UNIFORM_P (pnext_b32_tied1,
 
 /*
 ** pnext_b32_untied:
+** (
 **	mov	p0\.b, p1\.b
 **	pnext	p0\.s, p3, p0\.s
+** |
+**	pnext	p1\.s, p3, p1\.s
+**	mov	p0\.b, p1\.b
+** )
 **	ret
 */
 TEST_UNIFORM_P (pnext_b32_untied,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b64.c
index 59db2f04f2a..eed5a56f134 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b64.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b64.c
@@ -13,8 +13,13 @@  TEST_UNIFORM_P (pnext_b64_tied1,
 
 /*
 ** pnext_b64_untied:
+** (
 **	mov	p0\.b, p1\.b
 **	pnext	p0\.d, p3, p0\.d
+** |
+**	pnext	p1\.d, p3, p1\.d
+**	mov	p0\.b, p1\.b
+** )
 **	ret
 */
 TEST_UNIFORM_P (pnext_b64_untied,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b8.c
index cfc2e907c25..a36d43c4cff 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b8.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b8.c
@@ -13,8 +13,13 @@  TEST_UNIFORM_P (pnext_b8_tied1,
 
 /*
 ** pnext_b8_untied:
+** (
 **	mov	p0\.b, p1\.b
 **	pnext	p0\.b, p3, p0\.b
+** |
+**	pnext	p1\.b, p3, p1\.b
+**	mov	p0\.b, p1\.b
+** )
 **	ret
 */
 TEST_UNIFORM_P (pnext_b8_untied,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_s16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_s16.c
index 6772a5620d2..d91d499da5a 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_s16.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_s16.c
@@ -18,8 +18,13 @@  TEST_UNIFORM_Z (sli_0_s16_tied2, svint16_t,
 
 /*
 ** sli_0_s16_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sli	z0\.h, z2\.h, #0
+** |
+**	sli	z1\.h, z2\.h, #0
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sli_0_s16_untied, svint16_t,
@@ -42,8 +47,13 @@  TEST_UNIFORM_Z (sli_1_s16_tied2, svint16_t,
 
 /*
 ** sli_1_s16_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sli	z0\.h, z2\.h, #1
+** |
+**	sli	z1\.h, z2\.h, #1
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sli_1_s16_untied, svint16_t,
@@ -66,8 +76,13 @@  TEST_UNIFORM_Z (sli_15_s16_tied2, svint16_t,
 
 /*
 ** sli_15_s16_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sli	z0\.h, z2\.h, #15
+** |
+**	sli	z1\.h, z2\.h, #15
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sli_15_s16_untied, svint16_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_s32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_s32.c
index 023e7c40d3e..3ae507c432c 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_s32.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_s32.c
@@ -18,8 +18,13 @@  TEST_UNIFORM_Z (sli_0_s32_tied2, svint32_t,
 
 /*
 ** sli_0_s32_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sli	z0\.s, z2\.s, #0
+** |
+**	sli	z1\.s, z2\.s, #0
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sli_0_s32_untied, svint32_t,
@@ -42,8 +47,13 @@  TEST_UNIFORM_Z (sli_1_s32_tied2, svint32_t,
 
 /*
 ** sli_1_s32_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sli	z0\.s, z2\.s, #1
+** |
+**	sli	z1\.s, z2\.s, #1
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sli_1_s32_untied, svint32_t,
@@ -66,8 +76,13 @@  TEST_UNIFORM_Z (sli_31_s32_tied2, svint32_t,
 
 /*
 ** sli_31_s32_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sli	z0\.s, z2\.s, #31
+** |
+**	sli	z1\.s, z2\.s, #31
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sli_31_s32_untied, svint32_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_s64.c
index c37db1b4796..93c5723a804 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_s64.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_s64.c
@@ -18,8 +18,13 @@  TEST_UNIFORM_Z (sli_0_s64_tied2, svint64_t,
 
 /*
 ** sli_0_s64_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sli	z0\.d, z2\.d, #0
+** |
+**	sli	z1\.d, z2\.d, #0
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sli_0_s64_untied, svint64_t,
@@ -42,8 +47,13 @@  TEST_UNIFORM_Z (sli_1_s64_tied2, svint64_t,
 
 /*
 ** sli_1_s64_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sli	z0\.d, z2\.d, #1
+** |
+**	sli	z1\.d, z2\.d, #1
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sli_1_s64_untied, svint64_t,
@@ -66,8 +76,13 @@  TEST_UNIFORM_Z (sli_63_s64_tied2, svint64_t,
 
 /*
 ** sli_63_s64_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sli	z0\.d, z2\.d, #63
+** |
+**	sli	z1\.d, z2\.d, #63
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sli_63_s64_untied, svint64_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_s8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_s8.c
index ea0dcdc1871..5ac336f76d1 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_s8.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_s8.c
@@ -18,8 +18,13 @@  TEST_UNIFORM_Z (sli_0_s8_tied2, svint8_t,
 
 /*
 ** sli_0_s8_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sli	z0\.b, z2\.b, #0
+** |
+**	sli	z1\.b, z2\.b, #0
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sli_0_s8_untied, svint8_t,
@@ -42,8 +47,13 @@  TEST_UNIFORM_Z (sli_1_s8_tied2, svint8_t,
 
 /*
 ** sli_1_s8_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sli	z0\.b, z2\.b, #1
+** |
+**	sli	z1\.b, z2\.b, #1
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sli_1_s8_untied, svint8_t,
@@ -66,8 +76,13 @@  TEST_UNIFORM_Z (sli_7_s8_tied2, svint8_t,
 
 /*
 ** sli_7_s8_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sli	z0\.b, z2\.b, #7
+** |
+**	sli	z1\.b, z2\.b, #7
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sli_7_s8_untied, svint8_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_u16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_u16.c
index 475c00ea6a4..b6cbb55b8a0 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_u16.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_u16.c
@@ -18,8 +18,13 @@  TEST_UNIFORM_Z (sli_0_u16_tied2, svuint16_t,
 
 /*
 ** sli_0_u16_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sli	z0\.h, z2\.h, #0
+** |
+**	sli	z1\.h, z2\.h, #0
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sli_0_u16_untied, svuint16_t,
@@ -42,8 +47,13 @@  TEST_UNIFORM_Z (sli_1_u16_tied2, svuint16_t,
 
 /*
 ** sli_1_u16_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sli	z0\.h, z2\.h, #1
+** |
+**	sli	z1\.h, z2\.h, #1
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sli_1_u16_untied, svuint16_t,
@@ -66,8 +76,13 @@  TEST_UNIFORM_Z (sli_15_u16_tied2, svuint16_t,
 
 /*
 ** sli_15_u16_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sli	z0\.h, z2\.h, #15
+** |
+**	sli	z1\.h, z2\.h, #15
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sli_15_u16_untied, svuint16_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_u32.c
index 52bd8370e5f..654f4b6d670 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_u32.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_u32.c
@@ -18,8 +18,13 @@  TEST_UNIFORM_Z (sli_0_u32_tied2, svuint32_t,
 
 /*
 ** sli_0_u32_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sli	z0\.s, z2\.s, #0
+** |
+**	sli	z1\.s, z2\.s, #0
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sli_0_u32_untied, svuint32_t,
@@ -42,8 +47,13 @@  TEST_UNIFORM_Z (sli_1_u32_tied2, svuint32_t,
 
 /*
 ** sli_1_u32_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sli	z0\.s, z2\.s, #1
+** |
+**	sli	z1\.s, z2\.s, #1
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sli_1_u32_untied, svuint32_t,
@@ -66,8 +76,13 @@  TEST_UNIFORM_Z (sli_31_u32_tied2, svuint32_t,
 
 /*
 ** sli_31_u32_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sli	z0\.s, z2\.s, #31
+** |
+**	sli	z1\.s, z2\.s, #31
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sli_31_u32_untied, svuint32_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_u64.c
index ab75ba2e6d5..c5466a295cc 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_u64.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_u64.c
@@ -18,8 +18,13 @@  TEST_UNIFORM_Z (sli_0_u64_tied2, svuint64_t,
 
 /*
 ** sli_0_u64_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sli	z0\.d, z2\.d, #0
+** |
+**	sli	z1\.d, z2\.d, #0
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sli_0_u64_untied, svuint64_t,
@@ -42,8 +47,13 @@  TEST_UNIFORM_Z (sli_1_u64_tied2, svuint64_t,
 
 /*
 ** sli_1_u64_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sli	z0\.d, z2\.d, #1
+** |
+**	sli	z1\.d, z2\.d, #1
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sli_1_u64_untied, svuint64_t,
@@ -66,8 +76,13 @@  TEST_UNIFORM_Z (sli_63_u64_tied2, svuint64_t,
 
 /*
 ** sli_63_u64_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sli	z0\.d, z2\.d, #63
+** |
+**	sli	z1\.d, z2\.d, #63
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sli_63_u64_untied, svuint64_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_u8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_u8.c
index e2207c3c466..2b3533f5e71 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_u8.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sli_u8.c
@@ -18,8 +18,13 @@  TEST_UNIFORM_Z (sli_0_u8_tied2, svuint8_t,
 
 /*
 ** sli_0_u8_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sli	z0\.b, z2\.b, #0
+** |
+**	sli	z1\.b, z2\.b, #0
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sli_0_u8_untied, svuint8_t,
@@ -42,8 +47,13 @@  TEST_UNIFORM_Z (sli_1_u8_tied2, svuint8_t,
 
 /*
 ** sli_1_u8_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sli	z0\.b, z2\.b, #1
+** |
+**	sli	z1\.b, z2\.b, #1
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sli_1_u8_untied, svuint8_t,
@@ -66,8 +76,13 @@  TEST_UNIFORM_Z (sli_7_u8_tied2, svuint8_t,
 
 /*
 ** sli_7_u8_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sli	z0\.b, z2\.b, #7
+** |
+**	sli	z1\.b, z2\.b, #7
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sli_7_u8_untied, svuint8_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_s16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_s16.c
index 177fbb20d62..16cb73ce542 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_s16.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_s16.c
@@ -18,8 +18,13 @@  TEST_UNIFORM_Z (sri_1_s16_tied2, svint16_t,
 
 /*
 ** sri_1_s16_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sri	z0\.h, z2\.h, #1
+** |
+**	sri	z1\.h, z2\.h, #1
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sri_1_s16_untied, svint16_t,
@@ -42,8 +47,13 @@  TEST_UNIFORM_Z (sri_2_s16_tied2, svint16_t,
 
 /*
 ** sri_2_s16_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sri	z0\.h, z2\.h, #2
+** |
+**	sri	z1\.h, z2\.h, #2
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sri_2_s16_untied, svint16_t,
@@ -66,8 +76,13 @@  TEST_UNIFORM_Z (sri_16_s16_tied2, svint16_t,
 
 /*
 ** sri_16_s16_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sri	z0\.h, z2\.h, #16
+** |
+**	sri	z1\.h, z2\.h, #16
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sri_16_s16_untied, svint16_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_s32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_s32.c
index 27d6c99c3a0..3c69f622d74 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_s32.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_s32.c
@@ -18,8 +18,13 @@  TEST_UNIFORM_Z (sri_1_s32_tied2, svint32_t,
 
 /*
 ** sri_1_s32_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sri	z0\.s, z2\.s, #1
+** |
+**	sri	z1\.s, z2\.s, #1
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sri_1_s32_untied, svint32_t,
@@ -42,8 +47,13 @@  TEST_UNIFORM_Z (sri_2_s32_tied2, svint32_t,
 
 /*
 ** sri_2_s32_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sri	z0\.s, z2\.s, #2
+** |
+**	sri	z1\.s, z2\.s, #2
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sri_2_s32_untied, svint32_t,
@@ -66,8 +76,13 @@  TEST_UNIFORM_Z (sri_32_s32_tied2, svint32_t,
 
 /*
 ** sri_32_s32_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sri	z0\.s, z2\.s, #32
+** |
+**	sri	z1\.s, z2\.s, #32
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sri_32_s32_untied, svint32_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_s64.c
index 021613d0179..5c64e1bb51f 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_s64.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_s64.c
@@ -18,8 +18,13 @@  TEST_UNIFORM_Z (sri_1_s64_tied2, svint64_t,
 
 /*
 ** sri_1_s64_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sri	z0\.d, z2\.d, #1
+** |
+**	sri	z1\.d, z2\.d, #1
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sri_1_s64_untied, svint64_t,
@@ -42,8 +47,13 @@  TEST_UNIFORM_Z (sri_2_s64_tied2, svint64_t,
 
 /*
 ** sri_2_s64_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sri	z0\.d, z2\.d, #2
+** |
+**	sri	z1\.d, z2\.d, #2
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sri_2_s64_untied, svint64_t,
@@ -66,8 +76,13 @@  TEST_UNIFORM_Z (sri_64_s64_tied2, svint64_t,
 
 /*
 ** sri_64_s64_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sri	z0\.d, z2\.d, #64
+** |
+**	sri	z1\.d, z2\.d, #64
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sri_64_s64_untied, svint64_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_s8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_s8.c
index 0bfa2678559..1871bb47645 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_s8.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_s8.c
@@ -18,8 +18,13 @@  TEST_UNIFORM_Z (sri_1_s8_tied2, svint8_t,
 
 /*
 ** sri_1_s8_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sri	z0\.b, z2\.b, #1
+** |
+**	sri	z1\.b, z2\.b, #1
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sri_1_s8_untied, svint8_t,
@@ -42,8 +47,13 @@  TEST_UNIFORM_Z (sri_2_s8_tied2, svint8_t,
 
 /*
 ** sri_2_s8_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sri	z0\.b, z2\.b, #2
+** |
+**	sri	z1\.b, z2\.b, #2
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sri_2_s8_untied, svint8_t,
@@ -66,8 +76,13 @@  TEST_UNIFORM_Z (sri_8_s8_tied2, svint8_t,
 
 /*
 ** sri_8_s8_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sri	z0\.b, z2\.b, #8
+** |
+**	sri	z1\.b, z2\.b, #8
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sri_8_s8_untied, svint8_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_u16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_u16.c
index 2f12dc90857..ce6e838f7db 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_u16.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_u16.c
@@ -18,8 +18,13 @@  TEST_UNIFORM_Z (sri_1_u16_tied2, svuint16_t,
 
 /*
 ** sri_1_u16_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sri	z0\.h, z2\.h, #1
+** |
+**	sri	z1\.h, z2\.h, #1
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sri_1_u16_untied, svuint16_t,
@@ -42,8 +47,13 @@  TEST_UNIFORM_Z (sri_2_u16_tied2, svuint16_t,
 
 /*
 ** sri_2_u16_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sri	z0\.h, z2\.h, #2
+** |
+**	sri	z1\.h, z2\.h, #2
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sri_2_u16_untied, svuint16_t,
@@ -66,8 +76,13 @@  TEST_UNIFORM_Z (sri_16_u16_tied2, svuint16_t,
 
 /*
 ** sri_16_u16_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sri	z0\.h, z2\.h, #16
+** |
+**	sri	z1\.h, z2\.h, #16
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sri_16_u16_untied, svuint16_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_u32.c
index d4d107f55cc..7cf6fea771b 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_u32.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_u32.c
@@ -18,8 +18,13 @@  TEST_UNIFORM_Z (sri_1_u32_tied2, svuint32_t,
 
 /*
 ** sri_1_u32_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sri	z0\.s, z2\.s, #1
+** |
+**	sri	z1\.s, z2\.s, #1
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sri_1_u32_untied, svuint32_t,
@@ -42,8 +47,13 @@  TEST_UNIFORM_Z (sri_2_u32_tied2, svuint32_t,
 
 /*
 ** sri_2_u32_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sri	z0\.s, z2\.s, #2
+** |
+**	sri	z1\.s, z2\.s, #2
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sri_2_u32_untied, svuint32_t,
@@ -66,8 +76,13 @@  TEST_UNIFORM_Z (sri_32_u32_tied2, svuint32_t,
 
 /*
 ** sri_32_u32_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sri	z0\.s, z2\.s, #32
+** |
+**	sri	z1\.s, z2\.s, #32
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sri_32_u32_untied, svuint32_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_u64.c
index 41d67346f25..be61f85f265 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_u64.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_u64.c
@@ -18,8 +18,13 @@  TEST_UNIFORM_Z (sri_1_u64_tied2, svuint64_t,
 
 /*
 ** sri_1_u64_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sri	z0\.d, z2\.d, #1
+** |
+**	sri	z1\.d, z2\.d, #1
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sri_1_u64_untied, svuint64_t,
@@ -42,8 +47,13 @@  TEST_UNIFORM_Z (sri_2_u64_tied2, svuint64_t,
 
 /*
 ** sri_2_u64_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sri	z0\.d, z2\.d, #2
+** |
+**	sri	z1\.d, z2\.d, #2
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sri_2_u64_untied, svuint64_t,
@@ -66,8 +76,13 @@  TEST_UNIFORM_Z (sri_64_u64_tied2, svuint64_t,
 
 /*
 ** sri_64_u64_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sri	z0\.d, z2\.d, #64
+** |
+**	sri	z1\.d, z2\.d, #64
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sri_64_u64_untied, svuint64_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_u8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_u8.c
index 0aa6a543860..84de5a2b2e9 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_u8.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sri_u8.c
@@ -18,8 +18,13 @@  TEST_UNIFORM_Z (sri_1_u8_tied2, svuint8_t,
 
 /*
 ** sri_1_u8_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sri	z0\.b, z2\.b, #1
+** |
+**	sri	z1\.b, z2\.b, #1
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sri_1_u8_untied, svuint8_t,
@@ -42,8 +47,13 @@  TEST_UNIFORM_Z (sri_2_u8_tied2, svuint8_t,
 
 /*
 ** sri_2_u8_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sri	z0\.b, z2\.b, #2
+** |
+**	sri	z1\.b, z2\.b, #2
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sri_2_u8_untied, svuint8_t,
@@ -66,8 +76,13 @@  TEST_UNIFORM_Z (sri_8_u8_tied2, svuint8_t,
 
 /*
 ** sri_8_u8_untied:
+** (
 **	mov	z0\.d, z1\.d
 **	sri	z0\.b, z2\.b, #8
+** |
+**	sri	z1\.b, z2\.b, #8
+**	mov	z0\.d, z1\.d
+** )
 **	ret
 */
 TEST_UNIFORM_Z (sri_8_u8_untied, svuint8_t,