testsuite: [arm] Relax expected register names in MVE tests

Message ID 20220929145727.269135-1-christophe.lyon@arm.com
State Accepted, archived
Headers
Series testsuite: [arm] Relax expected register names in MVE tests |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

Christophe Lyon Sept. 29, 2022, 2:57 p.m. UTC
  These two tests have hardcoded q0 as destination/source of load/store
instructions, but it is actually used only under
-mfloat-abi=hard. When using -mfloat-abi=softfp, other registers
(eg. q3) can be used to transfer function arguments from core
registers to MVE registers, making the expected regexp fail.

This small patch replaces q0 with q[0-7] to accept any 'q' register.

OK for trunk?

Thanks,

Christophe

gcc/testsuite/ChangeLog:

	* gcc.target/arm/mve/mve_load_memory_modes.c: Update expected
	registers.
	* gcc.target/arm/mve/mve_store_memory_modes.c: Likewise.
---
 .../arm/mve/mve_load_memory_modes.c           | 58 +++++++++----------
 .../arm/mve/mve_store_memory_modes.c          | 58 +++++++++----------
 2 files changed, 58 insertions(+), 58 deletions(-)
  

Comments

Kyrylo Tkachov Sept. 29, 2022, 3:01 p.m. UTC | #1
Hi Christophe,

> -----Original Message-----
> From: Christophe Lyon <christophe.lyon@arm.com>
> Sent: Thursday, September 29, 2022 3:57 PM
> To: gcc-patches@gcc.gnu.org
> Cc: Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>; Andre Simoes Dias Vieira
> <Andre.SimoesDiasVieira@arm.com>; Christophe Lyon
> <Christophe.Lyon@arm.com>
> Subject: [PATCH] testsuite: [arm] Relax expected register names in MVE tests
> 
> These two tests have hardcoded q0 as destination/source of load/store
> instructions, but it is actually used only under
> -mfloat-abi=hard. When using -mfloat-abi=softfp, other registers
> (eg. q3) can be used to transfer function arguments from core
> registers to MVE registers, making the expected regexp fail.
> 
> This small patch replaces q0 with q[0-7] to accept any 'q' register.
> 
> OK for trunk?
> 
> Thanks,
> 
> Christophe
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/arm/mve/mve_load_memory_modes.c: Update
> expected
> 	registers.
> 	* gcc.target/arm/mve/mve_store_memory_modes.c: Likewise.
> ---
>  .../arm/mve/mve_load_memory_modes.c           | 58 +++++++++----------
>  .../arm/mve/mve_store_memory_modes.c          | 58 +++++++++----------
>  2 files changed, 58 insertions(+), 58 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
> b/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
> index e35eb1108aa..fa05fdcefec 100644
> --- a/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
> +++ b/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
> @@ -7,7 +7,7 @@
>  /*
>  **off_load8_0:
>  **	...
> -**	vldrb.8	q0, \[r0, #16\]
> +**	vldrb.8	q[0-7], \[r0, #16\]
>  **	...
>  */
>  int8x16_t off_load8_0 (int8_t * a)
> @@ -18,7 +18,7 @@ int8x16_t off_load8_0 (int8_t * a)
>  /*
>  **off_load8_1:
>  **	...
> -**	vldrb.u16	q0, \[r0, #1\]
> +**	vldrb.u16	q[0-7], \[r0, #1\]
>  **	...
>  */
>  uint16x8_t off_load8_1 (uint8_t * a)
> @@ -29,7 +29,7 @@ uint16x8_t off_load8_1 (uint8_t * a)
>  /*
>  **off_load8_2:
>  **	...
> -**	vldrb.s32	q0, \[r0, #127\]
> +**	vldrb.s32	q[0-7], \[r0, #127\]
>  **	...
>  */
>  int32x4_t off_load8_2 (int8_t * a)
> @@ -40,7 +40,7 @@ int32x4_t off_load8_2 (int8_t * a)
>  /*
>  **off_load8_3:
>  **	...
> -**	vldrb.8	q0, \[r0, #-127\]
> +**	vldrb.8	q[0-7], \[r0, #-127\]
>  **	...
>  */
>  uint8x16_t off_load8_3 (uint8_t * a)
> @@ -51,7 +51,7 @@ uint8x16_t off_load8_3 (uint8_t * a)
>  /*
>  **not_off_load8_0:
>  **	...
> -**	vldrb.8	q0, \[r[0-9]+\]
> +**	vldrb.8	q[0-7], \[r[0-7]+\]
>  **	...
>  */
>  int8x16_t not_off_load8_0 (int8_t * a)
> @@ -62,7 +62,7 @@ int8x16_t not_off_load8_0 (int8_t * a)
>  /*
>  **off_loadfp16_0:
>  **	...
> -**	vldrh.16	q0, \[r0, #-244\]
> +**	vldrh.16	q[0-7], \[r0, #-244\]
>  **	...
>  */
>  float16x8_t off_loadfp16_0 (float16_t *a)
> @@ -73,7 +73,7 @@ float16x8_t off_loadfp16_0 (float16_t *a)
>  /*
>  **off_load16_0:
>  **	...
> -**	vldrh.16	q0, \[r0, #-2\]
> +**	vldrh.16	q[0-7], \[r0, #-2\]
>  **	...
>  */
>  uint16x8_t off_load16_0 (uint16_t * a)
> @@ -84,7 +84,7 @@ uint16x8_t off_load16_0 (uint16_t * a)
>  /*
>  **off_load16_1:
>  **	...
> -**	vldrh.u32	q0, \[r0, #254\]
> +**	vldrh.u32	q[0-7], \[r0, #254\]
>  **	...
>  */
>  uint32x4_t off_load16_1 (uint16_t * a)
> @@ -95,7 +95,7 @@ uint32x4_t off_load16_1 (uint16_t * a)
>  /*
>  **not_off_load16_0:
>  **	...
> -**	vldrh.16	q0, \[r[0-9]+\]
> +**	vldrh.16	q[0-7], \[r[0-7]+\]
>  **	...
>  */
>  int16x8_t not_off_load16_0 (int8_t * a)
> @@ -106,7 +106,7 @@ int16x8_t not_off_load16_0 (int8_t * a)
>  /*
>  **not_off_load16_1:
>  **	...
> -**	vldrh.u32	q0, \[r[0-9]+\]
> +**	vldrh.u32	q[0-7], \[r[0-7]+\]
>  **	...
>  */
>  uint32x4_t not_off_load16_1 (uint16_t * a)
> @@ -117,7 +117,7 @@ uint32x4_t not_off_load16_1 (uint16_t * a)
>  /*
>  **off_loadfp32_0:
>  **	...
> -**	vldrw.32	q0, \[r0, #24\]
> +**	vldrw.32	q[0-7], \[r0, #24\]
>  **	...
>  */
>  float32x4_t off_loadfp32_0 (float32_t *a)
> @@ -128,7 +128,7 @@ float32x4_t off_loadfp32_0 (float32_t *a)
>  /*
>  **off_load32_0:
>  **	...
> -**	vldrw.32	q0, \[r0, #4\]
> +**	vldrw.32	q[0-7], \[r0, #4\]
>  **	...
>  */
>  uint32x4_t off_load32_0 (uint32_t * a)
> @@ -139,7 +139,7 @@ uint32x4_t off_load32_0 (uint32_t * a)
>  /*
>  **off_load32_1:
>  **	...
> -**	vldrw.32	q0, \[r0, #-508\]
> +**	vldrw.32	q[0-7], \[r0, #-508\]
>  **	...
>  */

These make sense....

>  int32x4_t off_load32_1 (int32_t * a)
> @@ -149,7 +149,7 @@ int32x4_t off_load32_1 (int32_t * a)
>  /*
>  **pre_load8_0:
>  **	...
> -**	vldrb.8	q[0-9]+, \[r0, #16\]!
> +**	vldrb.8	q[0-7]+, \[r0, #16\]!
>  **	...
>  */


... but what is the reason for these changes?
Thanks,
Kyrill

>  int8_t* pre_load8_0 (int8_t * a, int8x16_t *v)
> @@ -162,7 +162,7 @@ int8_t* pre_load8_0 (int8_t * a, int8x16_t *v)
>  /*
>  **pre_load8_1:
>  **	...
> -**	vldrb.u16	q[0-9]+, \[r0, #4\]!
> +**	vldrb.u16	q[0-7]+, \[r0, #4\]!
>  **	...
>  */
>  uint8_t* pre_load8_1 (uint8_t * a, uint16x8_t *v)
> @@ -175,7 +175,7 @@ uint8_t* pre_load8_1 (uint8_t * a, uint16x8_t *v)
>  /*
>  **pre_loadfp16_0:
>  **	...
> -**	vldrh.16	q[0-9]+, \[r0, #128\]!
> +**	vldrh.16	q[0-7]+, \[r0, #128\]!
>  **	...
>  */
>  float16_t* pre_loadfp16_0 (float16_t *a, float16x8_t *v)
> @@ -188,7 +188,7 @@ float16_t* pre_loadfp16_0 (float16_t *a, float16x8_t
> *v)
>  /*
>  **pre_load16_0:
>  **	...
> -**	vldrh.16	q[0-9]+, \[r0, #-254\]!
> +**	vldrh.16	q[0-7]+, \[r0, #-254\]!
>  **	...
>  */
>  int16_t* pre_load16_0 (int16_t * a, int16x8_t *v)
> @@ -201,7 +201,7 @@ int16_t* pre_load16_0 (int16_t * a, int16x8_t *v)
>  /*
>  **pre_load16_1:
>  **	...
> -**	vldrh.s32	q[0-9]+, \[r0, #52\]!
> +**	vldrh.s32	q[0-7]+, \[r0, #52\]!
>  **	...
>  */
>  int16_t* pre_load16_1 (int16_t * a, int32x4_t *v)
> @@ -214,7 +214,7 @@ int16_t* pre_load16_1 (int16_t * a, int32x4_t *v)
>  /*
>  **pre_loadfp32_0:
>  **	...
> -**	vldrw.32	q[0-9]+, \[r0, #-72\]!
> +**	vldrw.32	q[0-7]+, \[r0, #-72\]!
>  **	...
>  */
>  float32_t* pre_loadfp32_0 (float32_t *a, float32x4_t *v)
> @@ -228,7 +228,7 @@ float32_t* pre_loadfp32_0 (float32_t *a, float32x4_t
> *v)
>  /*
>  **pre_load32_0:
>  **	...
> -**	vldrw.32	q[0-9]+, \[r0, #-4\]!
> +**	vldrw.32	q[0-7]+, \[r0, #-4\]!
>  **	...
>  */
>  uint32_t* pre_load32_0 (uint32_t * a, uint32x4_t *v)
> @@ -242,7 +242,7 @@ uint32_t* pre_load32_0 (uint32_t * a, uint32x4_t *v)
>  /*
>  **post_load8_0:
>  **	...
> -**	vldrb.8	q[0-9]+, \[r0\], #26
> +**	vldrb.8	q[0-7]+, \[r0\], #26
>  **	...
>  */
>  uint8_t* post_load8_0 (uint8_t * a, uint8x16_t *v)
> @@ -255,7 +255,7 @@ uint8_t* post_load8_0 (uint8_t * a, uint8x16_t *v)
>  /*
>  **post_load8_1:
>  **	...
> -**	vldrb.s16	q[0-9]+, \[r0\], #-1
> +**	vldrb.s16	q[0-7]+, \[r0\], #-1
>  **	...
>  */
>  int8_t* post_load8_1 (int8_t * a, int16x8_t *v)
> @@ -268,7 +268,7 @@ int8_t* post_load8_1 (int8_t * a, int16x8_t *v)
>  /*
>  **post_load8_2:
>  **	...
> -**	vldrb.8	q[0-9]+, \[r0\], #26
> +**	vldrb.8	q[0-7]+, \[r0\], #26
>  **	...
>  */
>  uint8_t* post_load8_2 (uint8_t * a, uint8x16_t *v)
> @@ -281,7 +281,7 @@ uint8_t* post_load8_2 (uint8_t * a, uint8x16_t *v)
>  /*
>  **post_load8_3:
>  **	...
> -**	vldrb.s16	q[0-9]+, \[r0\], #-1
> +**	vldrb.s16	q[0-7]+, \[r0\], #-1
>  **	...
>  */
>  int8_t* post_load8_3 (int8_t * a, int16x8_t *v)
> @@ -294,7 +294,7 @@ int8_t* post_load8_3 (int8_t * a, int16x8_t *v)
>  /*
>  **post_loadfp16_0:
>  **	...
> -**	vldrh.16	q[0-9]+, \[r0\], #-24
> +**	vldrh.16	q[0-7]+, \[r0\], #-24
>  **	...
>  */
>  float16_t* post_loadfp16_0 (float16_t *a, float16x8_t *v)
> @@ -307,7 +307,7 @@ float16_t* post_loadfp16_0 (float16_t *a, float16x8_t
> *v)
>  /*
>  **post_load16_0:
>  **	...
> -**	vldrh.16	q[0-9]+, \[r0\], #-126
> +**	vldrh.16	q[0-7]+, \[r0\], #-126
>  **	...
>  */
>  uint16_t* post_load16_0 (uint16_t * a, uint16x8_t *v)
> @@ -320,7 +320,7 @@ uint16_t* post_load16_0 (uint16_t * a, uint16x8_t
> *v)
>  /*
>  **post_load16_1:
>  **	...
> -**	vldrh.u32	q[0-9]+, \[r0\], #16
> +**	vldrh.u32	q[0-7]+, \[r0\], #16
>  **	...
>  */
>  uint16_t* post_load16_1 (uint16_t * a, uint32x4_t *v)
> @@ -333,7 +333,7 @@ uint16_t* post_load16_1 (uint16_t * a, uint32x4_t
> *v)
>  /*
>  **post_loadfp32_0:
>  **	...
> -**	vldrw.32	q[0-9]+, \[r0\], #4
> +**	vldrw.32	q[0-7]+, \[r0\], #4
>  **	...
>  */
>  float32_t* post_loadfp32_0 (float32_t *a, float32x4_t *v)
> @@ -346,7 +346,7 @@ float32_t* post_loadfp32_0 (float32_t *a, float32x4_t
> *v)
>  /*
>  **post_load32_0:
>  **	...
> -**	vldrw.32	q[0-9]+, \[r0\], #-16
> +**	vldrw.32	q[0-7]+, \[r0\], #-16
>  **	...
>  */
>  int32_t* post_load32_0 (int32_t * a, int32x4_t *v)
> diff --git a/gcc/testsuite/gcc.target/arm/mve/mve_store_memory_modes.c
> b/gcc/testsuite/gcc.target/arm/mve/mve_store_memory_modes.c
> index 632f5b44f0b..2fc62916657 100644
> --- a/gcc/testsuite/gcc.target/arm/mve/mve_store_memory_modes.c
> +++ b/gcc/testsuite/gcc.target/arm/mve/mve_store_memory_modes.c
> @@ -7,7 +7,7 @@
>  /*
>  **off_store8_0:
>  **	...
> -**	vstrb.8	q0, \[r0, #16\]
> +**	vstrb.8	q[0-7], \[r0, #16\]
>  **	...
>  */
>  uint8_t *off_store8_0 (uint8_t * a, uint8x16_t v)
> @@ -19,7 +19,7 @@ uint8_t *off_store8_0 (uint8_t * a, uint8x16_t v)
>  /*
>  **off_store8_1:
>  **	...
> -**	vstrb.16	q0, \[r0, #-1\]
> +**	vstrb.16	q[0-7], \[r0, #-1\]
>  **	...
>  */
>  int8_t *off_store8_1 (int8_t * a, int16x8_t v)
> @@ -31,7 +31,7 @@ int8_t *off_store8_1 (int8_t * a, int16x8_t v)
>  /*
>  **off_store8_2:
>  **	...
> -**	vstrb.32	q0, \[r0, #-127\]
> +**	vstrb.32	q[0-7], \[r0, #-127\]
>  **	...
>  */
>  uint8_t *off_store8_2 (uint8_t * a, uint32x4_t v)
> @@ -43,7 +43,7 @@ uint8_t *off_store8_2 (uint8_t * a, uint32x4_t v)
>  /*
>  **off_store8_3:
>  **	...
> -**	vstrb.8	q0, \[r0, #127\]
> +**	vstrb.8	q[0-7], \[r0, #127\]
>  **	...
>  */
>  int8_t *off_store8_3 (int8_t * a, int8x16_t v)
> @@ -55,7 +55,7 @@ int8_t *off_store8_3 (int8_t * a, int8x16_t v)
>  /*
>  **not_off_store8_0:
>  **	...
> -**	vstrb.8	q0, \[r[0-9]+\]
> +**	vstrb.8	q[0-7], \[r[0-7]+\]
>  **	...
>  */
>  uint8_t *not_off_store8_0 (uint8_t * a, uint8x16_t v)
> @@ -67,7 +67,7 @@ uint8_t *not_off_store8_0 (uint8_t * a, uint8x16_t v)
>  /*
>  **off_storefp16_0:
>  **	...
> -**	vstrh.16	q0, \[r0, #250\]
> +**	vstrh.16	q[0-7], \[r0, #250\]
>  **	...
>  */
>  float16_t *off_storefp16_0 (float16_t *a, float16x8_t v)
> @@ -79,7 +79,7 @@ float16_t *off_storefp16_0 (float16_t *a, float16x8_t v)
>  /*
>  **off_store16_0:
>  **	...
> -**	vstrh.16	q0, \[r0, #4\]
> +**	vstrh.16	q[0-7], \[r0, #4\]
>  **	...
>  */
>  int16_t *off_store16_0 (int16_t * a, int16x8_t v)
> @@ -91,7 +91,7 @@ int16_t *off_store16_0 (int16_t * a, int16x8_t v)
>  /*
>  **off_store16_1:
>  **	...
> -**	vstrh.32	q0, \[r0, #-254\]
> +**	vstrh.32	q[0-7], \[r0, #-254\]
>  **	...
>  */
>  int16_t *off_store16_1 (int16_t * a, int32x4_t v)
> @@ -103,7 +103,7 @@ int16_t *off_store16_1 (int16_t * a, int32x4_t v)
>  /*
>  **not_off_store16_0:
>  **	...
> -**	vstrh.16	q0, \[r[0-9]+\]
> +**	vstrh.16	q[0-7], \[r[0-7]+\]
>  **	...
>  */
>  uint8_t *not_off_store16_0 (uint8_t * a, uint16x8_t v)
> @@ -115,7 +115,7 @@ uint8_t *not_off_store16_0 (uint8_t * a, uint16x8_t
> v)
>  /*
>  **not_off_store16_1:
>  **	...
> -**	vstrh.32	q0, \[r[0-9]+\]
> +**	vstrh.32	q[0-7], \[r[0-7]+\]
>  **	...
>  */
>  int16_t *not_off_store16_1 (int16_t * a, int32x4_t v)
> @@ -127,7 +127,7 @@ int16_t *not_off_store16_1 (int16_t * a, int32x4_t v)
>  /*
>  **off_storefp32_0:
>  **	...
> -**	vstrw.32	q0, \[r0, #-412\]
> +**	vstrw.32	q[0-7], \[r0, #-412\]
>  **	...
>  */
>  float32_t *off_storefp32_0 (float32_t *a, float32x4_t v)
> @@ -139,7 +139,7 @@ float32_t *off_storefp32_0 (float32_t *a, float32x4_t
> v)
>  /*
>  **off_store32_0:
>  **	...
> -**	vstrw.32	q0, \[r0, #-4\]
> +**	vstrw.32	q[0-7], \[r0, #-4\]
>  **	...
>  */
>  int32_t *off_store32_0 (int32_t * a, int32x4_t v)
> @@ -151,7 +151,7 @@ int32_t *off_store32_0 (int32_t * a, int32x4_t v)
>  /*
>  **off_store32_1:
>  **	...
> -**	vstrw.32	q0, \[r0, #508\]
> +**	vstrw.32	q[0-7], \[r0, #508\]
>  **	...
>  */
>  uint32_t *off_store32_1 (uint32_t * a, uint32x4_t v)
> @@ -163,7 +163,7 @@ uint32_t *off_store32_1 (uint32_t * a, uint32x4_t v)
>  /*
>  **pre_store8_0:
>  **	...
> -**	vstrb.8	q[0-9]+, \[r0, #-16\]!
> +**	vstrb.8	q[0-7]+, \[r0, #-16\]!
>  **	...
>  */
>  uint8_t* pre_store8_0 (uint8_t * a, uint8x16_t v)
> @@ -176,7 +176,7 @@ uint8_t* pre_store8_0 (uint8_t * a, uint8x16_t v)
>  /*
>  **pre_store8_1:
>  **	...
> -**	vstrb.16	q[0-9]+, \[r0, #4\]!
> +**	vstrb.16	q[0-7]+, \[r0, #4\]!
>  **	...
>  */
>  int8_t* pre_store8_1 (int8_t * a, int16x8_t v)
> @@ -189,7 +189,7 @@ int8_t* pre_store8_1 (int8_t * a, int16x8_t v)
>  /*
>  **pre_storefp16_0:
>  **	...
> -**	vstrh.16	q0, \[r0, #8\]!
> +**	vstrh.16	q[0-7], \[r0, #8\]!
>  **	...
>  */
>  float16_t *pre_storefp16_0 (float16_t *a, float16x8_t v)
> @@ -202,7 +202,7 @@ float16_t *pre_storefp16_0 (float16_t *a, float16x8_t
> v)
>  /*
>  **pre_store16_0:
>  **	...
> -**	vstrh.16	q[0-9]+, \[r0, #254\]!
> +**	vstrh.16	q[0-7]+, \[r0, #254\]!
>  **	...
>  */
>  uint16_t* pre_store16_0 (uint16_t * a, uint16x8_t v)
> @@ -215,7 +215,7 @@ uint16_t* pre_store16_0 (uint16_t * a, uint16x8_t v)
>  /*
>  **pre_store16_1:
>  **	...
> -**	vstrh.32	q[0-9]+, \[r0, #-52\]!
> +**	vstrh.32	q[0-7]+, \[r0, #-52\]!
>  **	...
>  */
>  int16_t* pre_store16_1 (int16_t * a, int32x4_t v)
> @@ -228,7 +228,7 @@ int16_t* pre_store16_1 (int16_t * a, int32x4_t v)
>  /*
>  **pre_storefp32_0:
>  **	...
> -**	vstrw.32	q0, \[r0, #-4\]!
> +**	vstrw.32	q[0-7], \[r0, #-4\]!
>  **	...
>  */
>  float32_t *pre_storefp32_0 (float32_t *a, float32x4_t v)
> @@ -241,7 +241,7 @@ float32_t *pre_storefp32_0 (float32_t *a, float32x4_t
> v)
>  /*
>  **pre_store32_0:
>  **	...
> -**	vstrw.32	q[0-9]+, \[r0, #4\]!
> +**	vstrw.32	q[0-7]+, \[r0, #4\]!
>  **	...
>  */
>  int32_t* pre_store32_0 (int32_t * a, int32x4_t v)
> @@ -255,7 +255,7 @@ int32_t* pre_store32_0 (int32_t * a, int32x4_t v)
>  /*
>  **post_store8_0:
>  **	...
> -**	vstrb.8	q[0-9]+, \[r0\], #-26
> +**	vstrb.8	q[0-7]+, \[r0\], #-26
>  **	...
>  */
>  int8_t* post_store8_0 (int8_t * a, int8x16_t v)
> @@ -268,7 +268,7 @@ int8_t* post_store8_0 (int8_t * a, int8x16_t v)
>  /*
>  **post_store8_1:
>  **	...
> -**	vstrb.16	q[0-9]+, \[r0\], #1
> +**	vstrb.16	q[0-7]+, \[r0\], #1
>  **	...
>  */
>  uint8_t* post_store8_1 (uint8_t * a, uint16x8_t v)
> @@ -281,7 +281,7 @@ uint8_t* post_store8_1 (uint8_t * a, uint16x8_t v)
>  /*
>  **post_store8_2:
>  **	...
> -**	vstrb.8	q[0-9]+, \[r0\], #-26
> +**	vstrb.8	q[0-7]+, \[r0\], #-26
>  **	...
>  */
>  int8_t* post_store8_2 (int8_t * a, int8x16_t v)
> @@ -294,7 +294,7 @@ int8_t* post_store8_2 (int8_t * a, int8x16_t v)
>  /*
>  **post_store8_3:
>  **	...
> -**	vstrb.16	q[0-9]+, \[r0\], #7
> +**	vstrb.16	q[0-7]+, \[r0\], #7
>  **	...
>  */
>  uint8_t* post_store8_3 (uint8_t * a, uint16x8_t v)
> @@ -307,7 +307,7 @@ uint8_t* post_store8_3 (uint8_t * a, uint16x8_t v)
>  /*
>  **post_storefp16_0:
>  **	...
> -**	vstrh.16	q[0-9]+, \[r0\], #-16
> +**	vstrh.16	q[0-7]+, \[r0\], #-16
>  **	...
>  */
>  float16_t *post_storefp16_0 (float16_t *a, float16x8_t v)
> @@ -320,7 +320,7 @@ float16_t *post_storefp16_0 (float16_t *a,
> float16x8_t v)
>  /*
>  **post_store16_0:
>  **	...
> -**	vstrh.16	q[0-9]+, \[r0\], #126
> +**	vstrh.16	q[0-7]+, \[r0\], #126
>  **	...
>  */
>  int16_t* post_store16_0 (int16_t * a, int16x8_t v)
> @@ -333,7 +333,7 @@ int16_t* post_store16_0 (int16_t * a, int16x8_t v)
>  /*
>  **post_store16_1:
>  **	...
> -**	vstrh.32	q[0-9]+, \[r0\], #-16
> +**	vstrh.32	q[0-7]+, \[r0\], #-16
>  **	...
>  */
>  uint16_t* post_store16_1 (uint16_t * a, uint32x4_t v)
> @@ -346,7 +346,7 @@ uint16_t* post_store16_1 (uint16_t * a, uint32x4_t v)
>  /*
>  **post_storefp32_0:
>  **	...
> -**	vstrw.32	q[0-9]+, \[r0\], #-16
> +**	vstrw.32	q[0-7]+, \[r0\], #-16
>  **	...
>  */
>  float32_t* post_storefp32_0 (float32_t * a, float32x4_t v)
> @@ -359,7 +359,7 @@ float32_t* post_storefp32_0 (float32_t * a,
> float32x4_t v)
>  /*
>  **post_store32_0:
>  **	...
> -**	vstrw.32	q[0-9]+, \[r0\], #16
> +**	vstrw.32	q[0-7]+, \[r0\], #16
>  **	...
>  */
>  int32_t* post_store32_0 (int32_t * a, int32x4_t v)
> --
> 2.34.1
  
Christophe Lyon Sept. 29, 2022, 3:23 p.m. UTC | #2
On 9/29/22 17:01, Kyrylo Tkachov wrote:
> Hi Christophe,
> 
>> -----Original Message-----
>> From: Christophe Lyon <christophe.lyon@arm.com>
>> Sent: Thursday, September 29, 2022 3:57 PM
>> To: gcc-patches@gcc.gnu.org
>> Cc: Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>; Andre Simoes Dias Vieira
>> <Andre.SimoesDiasVieira@arm.com>; Christophe Lyon
>> <Christophe.Lyon@arm.com>
>> Subject: [PATCH] testsuite: [arm] Relax expected register names in MVE tests
>>
>> These two tests have hardcoded q0 as destination/source of load/store
>> instructions, but it is actually used only under
>> -mfloat-abi=hard. When using -mfloat-abi=softfp, other registers
>> (eg. q3) can be used to transfer function arguments from core
>> registers to MVE registers, making the expected regexp fail.
>>
>> This small patch replaces q0 with q[0-7] to accept any 'q' register.
>>
>> OK for trunk?
>>
>> Thanks,
>>
>> Christophe
>>
>> gcc/testsuite/ChangeLog:
>>
>> 	* gcc.target/arm/mve/mve_load_memory_modes.c: Update
>> expected
>> 	registers.
>> 	* gcc.target/arm/mve/mve_store_memory_modes.c: Likewise.
>> ---
>>   .../arm/mve/mve_load_memory_modes.c           | 58 +++++++++----------
>>   .../arm/mve/mve_store_memory_modes.c          | 58 +++++++++----------
>>   2 files changed, 58 insertions(+), 58 deletions(-)
>>
>> diff --git a/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
>> b/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
>> index e35eb1108aa..fa05fdcefec 100644
>> --- a/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
>> +++ b/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
>> @@ -7,7 +7,7 @@
>>   /*
>>   **off_load8_0:
>>   **	...
>> -**	vldrb.8	q0, \[r0, #16\]
>> +**	vldrb.8	q[0-7], \[r0, #16\]
>>   **	...
>>   */
>>   int8x16_t off_load8_0 (int8_t * a)
>> @@ -18,7 +18,7 @@ int8x16_t off_load8_0 (int8_t * a)
>>   /*
>>   **off_load8_1:
>>   **	...
>> -**	vldrb.u16	q0, \[r0, #1\]
>> +**	vldrb.u16	q[0-7], \[r0, #1\]
>>   **	...
>>   */
>>   uint16x8_t off_load8_1 (uint8_t * a)
>> @@ -29,7 +29,7 @@ uint16x8_t off_load8_1 (uint8_t * a)
>>   /*
>>   **off_load8_2:
>>   **	...
>> -**	vldrb.s32	q0, \[r0, #127\]
>> +**	vldrb.s32	q[0-7], \[r0, #127\]
>>   **	...
>>   */
>>   int32x4_t off_load8_2 (int8_t * a)
>> @@ -40,7 +40,7 @@ int32x4_t off_load8_2 (int8_t * a)
>>   /*
>>   **off_load8_3:
>>   **	...
>> -**	vldrb.8	q0, \[r0, #-127\]
>> +**	vldrb.8	q[0-7], \[r0, #-127\]
>>   **	...
>>   */
>>   uint8x16_t off_load8_3 (uint8_t * a)
>> @@ -51,7 +51,7 @@ uint8x16_t off_load8_3 (uint8_t * a)
>>   /*
>>   **not_off_load8_0:
>>   **	...
>> -**	vldrb.8	q0, \[r[0-9]+\]
>> +**	vldrb.8	q[0-7], \[r[0-7]+\]
>>   **	...
>>   */
>>   int8x16_t not_off_load8_0 (int8_t * a)
>> @@ -62,7 +62,7 @@ int8x16_t not_off_load8_0 (int8_t * a)
>>   /*
>>   **off_loadfp16_0:
>>   **	...
>> -**	vldrh.16	q0, \[r0, #-244\]
>> +**	vldrh.16	q[0-7], \[r0, #-244\]
>>   **	...
>>   */
>>   float16x8_t off_loadfp16_0 (float16_t *a)
>> @@ -73,7 +73,7 @@ float16x8_t off_loadfp16_0 (float16_t *a)
>>   /*
>>   **off_load16_0:
>>   **	...
>> -**	vldrh.16	q0, \[r0, #-2\]
>> +**	vldrh.16	q[0-7], \[r0, #-2\]
>>   **	...
>>   */
>>   uint16x8_t off_load16_0 (uint16_t * a)
>> @@ -84,7 +84,7 @@ uint16x8_t off_load16_0 (uint16_t * a)
>>   /*
>>   **off_load16_1:
>>   **	...
>> -**	vldrh.u32	q0, \[r0, #254\]
>> +**	vldrh.u32	q[0-7], \[r0, #254\]
>>   **	...
>>   */
>>   uint32x4_t off_load16_1 (uint16_t * a)
>> @@ -95,7 +95,7 @@ uint32x4_t off_load16_1 (uint16_t * a)
>>   /*
>>   **not_off_load16_0:
>>   **	...
>> -**	vldrh.16	q0, \[r[0-9]+\]
>> +**	vldrh.16	q[0-7], \[r[0-7]+\]
>>   **	...
>>   */
>>   int16x8_t not_off_load16_0 (int8_t * a)
>> @@ -106,7 +106,7 @@ int16x8_t not_off_load16_0 (int8_t * a)
>>   /*
>>   **not_off_load16_1:
>>   **	...
>> -**	vldrh.u32	q0, \[r[0-9]+\]
>> +**	vldrh.u32	q[0-7], \[r[0-7]+\]
>>   **	...
>>   */
>>   uint32x4_t not_off_load16_1 (uint16_t * a)
>> @@ -117,7 +117,7 @@ uint32x4_t not_off_load16_1 (uint16_t * a)
>>   /*
>>   **off_loadfp32_0:
>>   **	...
>> -**	vldrw.32	q0, \[r0, #24\]
>> +**	vldrw.32	q[0-7], \[r0, #24\]
>>   **	...
>>   */
>>   float32x4_t off_loadfp32_0 (float32_t *a)
>> @@ -128,7 +128,7 @@ float32x4_t off_loadfp32_0 (float32_t *a)
>>   /*
>>   **off_load32_0:
>>   **	...
>> -**	vldrw.32	q0, \[r0, #4\]
>> +**	vldrw.32	q[0-7], \[r0, #4\]
>>   **	...
>>   */
>>   uint32x4_t off_load32_0 (uint32_t * a)
>> @@ -139,7 +139,7 @@ uint32x4_t off_load32_0 (uint32_t * a)
>>   /*
>>   **off_load32_1:
>>   **	...
>> -**	vldrw.32	q0, \[r0, #-508\]
>> +**	vldrw.32	q[0-7], \[r0, #-508\]
>>   **	...
>>   */
> 
> These make sense....
> 
>>   int32x4_t off_load32_1 (int32_t * a)
>> @@ -149,7 +149,7 @@ int32x4_t off_load32_1 (int32_t * a)
>>   /*
>>   **pre_load8_0:
>>   **	...
>> -**	vldrb.8	q[0-9]+, \[r0, #16\]!
>> +**	vldrb.8	q[0-7]+, \[r0, #16\]!
>>   **	...
>>   */
> 
> 
> ... but what is the reason for these changes?

oops, good catch.
I originally replaced all q0 with q[0-9], then realized q[0-7] was 
probably better/safer, looks like I also changed pre-existing 0-9 and 
didn't notice.

So since I have to re-submit this patch, what's the best choice?
q[0-9] or q[0-7]?

Thanks,

Christophe

> Thanks,
> Kyrill
> 
>>   int8_t* pre_load8_0 (int8_t * a, int8x16_t *v)
>> @@ -162,7 +162,7 @@ int8_t* pre_load8_0 (int8_t * a, int8x16_t *v)
>>   /*
>>   **pre_load8_1:
>>   **	...
>> -**	vldrb.u16	q[0-9]+, \[r0, #4\]!
>> +**	vldrb.u16	q[0-7]+, \[r0, #4\]!
>>   **	...
>>   */
>>   uint8_t* pre_load8_1 (uint8_t * a, uint16x8_t *v)
>> @@ -175,7 +175,7 @@ uint8_t* pre_load8_1 (uint8_t * a, uint16x8_t *v)
>>   /*
>>   **pre_loadfp16_0:
>>   **	...
>> -**	vldrh.16	q[0-9]+, \[r0, #128\]!
>> +**	vldrh.16	q[0-7]+, \[r0, #128\]!
>>   **	...
>>   */
>>   float16_t* pre_loadfp16_0 (float16_t *a, float16x8_t *v)
>> @@ -188,7 +188,7 @@ float16_t* pre_loadfp16_0 (float16_t *a, float16x8_t
>> *v)
>>   /*
>>   **pre_load16_0:
>>   **	...
>> -**	vldrh.16	q[0-9]+, \[r0, #-254\]!
>> +**	vldrh.16	q[0-7]+, \[r0, #-254\]!
>>   **	...
>>   */
>>   int16_t* pre_load16_0 (int16_t * a, int16x8_t *v)
>> @@ -201,7 +201,7 @@ int16_t* pre_load16_0 (int16_t * a, int16x8_t *v)
>>   /*
>>   **pre_load16_1:
>>   **	...
>> -**	vldrh.s32	q[0-9]+, \[r0, #52\]!
>> +**	vldrh.s32	q[0-7]+, \[r0, #52\]!
>>   **	...
>>   */
>>   int16_t* pre_load16_1 (int16_t * a, int32x4_t *v)
>> @@ -214,7 +214,7 @@ int16_t* pre_load16_1 (int16_t * a, int32x4_t *v)
>>   /*
>>   **pre_loadfp32_0:
>>   **	...
>> -**	vldrw.32	q[0-9]+, \[r0, #-72\]!
>> +**	vldrw.32	q[0-7]+, \[r0, #-72\]!
>>   **	...
>>   */
>>   float32_t* pre_loadfp32_0 (float32_t *a, float32x4_t *v)
>> @@ -228,7 +228,7 @@ float32_t* pre_loadfp32_0 (float32_t *a, float32x4_t
>> *v)
>>   /*
>>   **pre_load32_0:
>>   **	...
>> -**	vldrw.32	q[0-9]+, \[r0, #-4\]!
>> +**	vldrw.32	q[0-7]+, \[r0, #-4\]!
>>   **	...
>>   */
>>   uint32_t* pre_load32_0 (uint32_t * a, uint32x4_t *v)
>> @@ -242,7 +242,7 @@ uint32_t* pre_load32_0 (uint32_t * a, uint32x4_t *v)
>>   /*
>>   **post_load8_0:
>>   **	...
>> -**	vldrb.8	q[0-9]+, \[r0\], #26
>> +**	vldrb.8	q[0-7]+, \[r0\], #26
>>   **	...
>>   */
>>   uint8_t* post_load8_0 (uint8_t * a, uint8x16_t *v)
>> @@ -255,7 +255,7 @@ uint8_t* post_load8_0 (uint8_t * a, uint8x16_t *v)
>>   /*
>>   **post_load8_1:
>>   **	...
>> -**	vldrb.s16	q[0-9]+, \[r0\], #-1
>> +**	vldrb.s16	q[0-7]+, \[r0\], #-1
>>   **	...
>>   */
>>   int8_t* post_load8_1 (int8_t * a, int16x8_t *v)
>> @@ -268,7 +268,7 @@ int8_t* post_load8_1 (int8_t * a, int16x8_t *v)
>>   /*
>>   **post_load8_2:
>>   **	...
>> -**	vldrb.8	q[0-9]+, \[r0\], #26
>> +**	vldrb.8	q[0-7]+, \[r0\], #26
>>   **	...
>>   */
>>   uint8_t* post_load8_2 (uint8_t * a, uint8x16_t *v)
>> @@ -281,7 +281,7 @@ uint8_t* post_load8_2 (uint8_t * a, uint8x16_t *v)
>>   /*
>>   **post_load8_3:
>>   **	...
>> -**	vldrb.s16	q[0-9]+, \[r0\], #-1
>> +**	vldrb.s16	q[0-7]+, \[r0\], #-1
>>   **	...
>>   */
>>   int8_t* post_load8_3 (int8_t * a, int16x8_t *v)
>> @@ -294,7 +294,7 @@ int8_t* post_load8_3 (int8_t * a, int16x8_t *v)
>>   /*
>>   **post_loadfp16_0:
>>   **	...
>> -**	vldrh.16	q[0-9]+, \[r0\], #-24
>> +**	vldrh.16	q[0-7]+, \[r0\], #-24
>>   **	...
>>   */
>>   float16_t* post_loadfp16_0 (float16_t *a, float16x8_t *v)
>> @@ -307,7 +307,7 @@ float16_t* post_loadfp16_0 (float16_t *a, float16x8_t
>> *v)
>>   /*
>>   **post_load16_0:
>>   **	...
>> -**	vldrh.16	q[0-9]+, \[r0\], #-126
>> +**	vldrh.16	q[0-7]+, \[r0\], #-126
>>   **	...
>>   */
>>   uint16_t* post_load16_0 (uint16_t * a, uint16x8_t *v)
>> @@ -320,7 +320,7 @@ uint16_t* post_load16_0 (uint16_t * a, uint16x8_t
>> *v)
>>   /*
>>   **post_load16_1:
>>   **	...
>> -**	vldrh.u32	q[0-9]+, \[r0\], #16
>> +**	vldrh.u32	q[0-7]+, \[r0\], #16
>>   **	...
>>   */
>>   uint16_t* post_load16_1 (uint16_t * a, uint32x4_t *v)
>> @@ -333,7 +333,7 @@ uint16_t* post_load16_1 (uint16_t * a, uint32x4_t
>> *v)
>>   /*
>>   **post_loadfp32_0:
>>   **	...
>> -**	vldrw.32	q[0-9]+, \[r0\], #4
>> +**	vldrw.32	q[0-7]+, \[r0\], #4
>>   **	...
>>   */
>>   float32_t* post_loadfp32_0 (float32_t *a, float32x4_t *v)
>> @@ -346,7 +346,7 @@ float32_t* post_loadfp32_0 (float32_t *a, float32x4_t
>> *v)
>>   /*
>>   **post_load32_0:
>>   **	...
>> -**	vldrw.32	q[0-9]+, \[r0\], #-16
>> +**	vldrw.32	q[0-7]+, \[r0\], #-16
>>   **	...
>>   */
>>   int32_t* post_load32_0 (int32_t * a, int32x4_t *v)
>> diff --git a/gcc/testsuite/gcc.target/arm/mve/mve_store_memory_modes.c
>> b/gcc/testsuite/gcc.target/arm/mve/mve_store_memory_modes.c
>> index 632f5b44f0b..2fc62916657 100644
>> --- a/gcc/testsuite/gcc.target/arm/mve/mve_store_memory_modes.c
>> +++ b/gcc/testsuite/gcc.target/arm/mve/mve_store_memory_modes.c
>> @@ -7,7 +7,7 @@
>>   /*
>>   **off_store8_0:
>>   **	...
>> -**	vstrb.8	q0, \[r0, #16\]
>> +**	vstrb.8	q[0-7], \[r0, #16\]
>>   **	...
>>   */
>>   uint8_t *off_store8_0 (uint8_t * a, uint8x16_t v)
>> @@ -19,7 +19,7 @@ uint8_t *off_store8_0 (uint8_t * a, uint8x16_t v)
>>   /*
>>   **off_store8_1:
>>   **	...
>> -**	vstrb.16	q0, \[r0, #-1\]
>> +**	vstrb.16	q[0-7], \[r0, #-1\]
>>   **	...
>>   */
>>   int8_t *off_store8_1 (int8_t * a, int16x8_t v)
>> @@ -31,7 +31,7 @@ int8_t *off_store8_1 (int8_t * a, int16x8_t v)
>>   /*
>>   **off_store8_2:
>>   **	...
>> -**	vstrb.32	q0, \[r0, #-127\]
>> +**	vstrb.32	q[0-7], \[r0, #-127\]
>>   **	...
>>   */
>>   uint8_t *off_store8_2 (uint8_t * a, uint32x4_t v)
>> @@ -43,7 +43,7 @@ uint8_t *off_store8_2 (uint8_t * a, uint32x4_t v)
>>   /*
>>   **off_store8_3:
>>   **	...
>> -**	vstrb.8	q0, \[r0, #127\]
>> +**	vstrb.8	q[0-7], \[r0, #127\]
>>   **	...
>>   */
>>   int8_t *off_store8_3 (int8_t * a, int8x16_t v)
>> @@ -55,7 +55,7 @@ int8_t *off_store8_3 (int8_t * a, int8x16_t v)
>>   /*
>>   **not_off_store8_0:
>>   **	...
>> -**	vstrb.8	q0, \[r[0-9]+\]
>> +**	vstrb.8	q[0-7], \[r[0-7]+\]
>>   **	...
>>   */
>>   uint8_t *not_off_store8_0 (uint8_t * a, uint8x16_t v)
>> @@ -67,7 +67,7 @@ uint8_t *not_off_store8_0 (uint8_t * a, uint8x16_t v)
>>   /*
>>   **off_storefp16_0:
>>   **	...
>> -**	vstrh.16	q0, \[r0, #250\]
>> +**	vstrh.16	q[0-7], \[r0, #250\]
>>   **	...
>>   */
>>   float16_t *off_storefp16_0 (float16_t *a, float16x8_t v)
>> @@ -79,7 +79,7 @@ float16_t *off_storefp16_0 (float16_t *a, float16x8_t v)
>>   /*
>>   **off_store16_0:
>>   **	...
>> -**	vstrh.16	q0, \[r0, #4\]
>> +**	vstrh.16	q[0-7], \[r0, #4\]
>>   **	...
>>   */
>>   int16_t *off_store16_0 (int16_t * a, int16x8_t v)
>> @@ -91,7 +91,7 @@ int16_t *off_store16_0 (int16_t * a, int16x8_t v)
>>   /*
>>   **off_store16_1:
>>   **	...
>> -**	vstrh.32	q0, \[r0, #-254\]
>> +**	vstrh.32	q[0-7], \[r0, #-254\]
>>   **	...
>>   */
>>   int16_t *off_store16_1 (int16_t * a, int32x4_t v)
>> @@ -103,7 +103,7 @@ int16_t *off_store16_1 (int16_t * a, int32x4_t v)
>>   /*
>>   **not_off_store16_0:
>>   **	...
>> -**	vstrh.16	q0, \[r[0-9]+\]
>> +**	vstrh.16	q[0-7], \[r[0-7]+\]
>>   **	...
>>   */
>>   uint8_t *not_off_store16_0 (uint8_t * a, uint16x8_t v)
>> @@ -115,7 +115,7 @@ uint8_t *not_off_store16_0 (uint8_t * a, uint16x8_t
>> v)
>>   /*
>>   **not_off_store16_1:
>>   **	...
>> -**	vstrh.32	q0, \[r[0-9]+\]
>> +**	vstrh.32	q[0-7], \[r[0-7]+\]
>>   **	...
>>   */
>>   int16_t *not_off_store16_1 (int16_t * a, int32x4_t v)
>> @@ -127,7 +127,7 @@ int16_t *not_off_store16_1 (int16_t * a, int32x4_t v)
>>   /*
>>   **off_storefp32_0:
>>   **	...
>> -**	vstrw.32	q0, \[r0, #-412\]
>> +**	vstrw.32	q[0-7], \[r0, #-412\]
>>   **	...
>>   */
>>   float32_t *off_storefp32_0 (float32_t *a, float32x4_t v)
>> @@ -139,7 +139,7 @@ float32_t *off_storefp32_0 (float32_t *a, float32x4_t
>> v)
>>   /*
>>   **off_store32_0:
>>   **	...
>> -**	vstrw.32	q0, \[r0, #-4\]
>> +**	vstrw.32	q[0-7], \[r0, #-4\]
>>   **	...
>>   */
>>   int32_t *off_store32_0 (int32_t * a, int32x4_t v)
>> @@ -151,7 +151,7 @@ int32_t *off_store32_0 (int32_t * a, int32x4_t v)
>>   /*
>>   **off_store32_1:
>>   **	...
>> -**	vstrw.32	q0, \[r0, #508\]
>> +**	vstrw.32	q[0-7], \[r0, #508\]
>>   **	...
>>   */
>>   uint32_t *off_store32_1 (uint32_t * a, uint32x4_t v)
>> @@ -163,7 +163,7 @@ uint32_t *off_store32_1 (uint32_t * a, uint32x4_t v)
>>   /*
>>   **pre_store8_0:
>>   **	...
>> -**	vstrb.8	q[0-9]+, \[r0, #-16\]!
>> +**	vstrb.8	q[0-7]+, \[r0, #-16\]!
>>   **	...
>>   */
>>   uint8_t* pre_store8_0 (uint8_t * a, uint8x16_t v)
>> @@ -176,7 +176,7 @@ uint8_t* pre_store8_0 (uint8_t * a, uint8x16_t v)
>>   /*
>>   **pre_store8_1:
>>   **	...
>> -**	vstrb.16	q[0-9]+, \[r0, #4\]!
>> +**	vstrb.16	q[0-7]+, \[r0, #4\]!
>>   **	...
>>   */
>>   int8_t* pre_store8_1 (int8_t * a, int16x8_t v)
>> @@ -189,7 +189,7 @@ int8_t* pre_store8_1 (int8_t * a, int16x8_t v)
>>   /*
>>   **pre_storefp16_0:
>>   **	...
>> -**	vstrh.16	q0, \[r0, #8\]!
>> +**	vstrh.16	q[0-7], \[r0, #8\]!
>>   **	...
>>   */
>>   float16_t *pre_storefp16_0 (float16_t *a, float16x8_t v)
>> @@ -202,7 +202,7 @@ float16_t *pre_storefp16_0 (float16_t *a, float16x8_t
>> v)
>>   /*
>>   **pre_store16_0:
>>   **	...
>> -**	vstrh.16	q[0-9]+, \[r0, #254\]!
>> +**	vstrh.16	q[0-7]+, \[r0, #254\]!
>>   **	...
>>   */
>>   uint16_t* pre_store16_0 (uint16_t * a, uint16x8_t v)
>> @@ -215,7 +215,7 @@ uint16_t* pre_store16_0 (uint16_t * a, uint16x8_t v)
>>   /*
>>   **pre_store16_1:
>>   **	...
>> -**	vstrh.32	q[0-9]+, \[r0, #-52\]!
>> +**	vstrh.32	q[0-7]+, \[r0, #-52\]!
>>   **	...
>>   */
>>   int16_t* pre_store16_1 (int16_t * a, int32x4_t v)
>> @@ -228,7 +228,7 @@ int16_t* pre_store16_1 (int16_t * a, int32x4_t v)
>>   /*
>>   **pre_storefp32_0:
>>   **	...
>> -**	vstrw.32	q0, \[r0, #-4\]!
>> +**	vstrw.32	q[0-7], \[r0, #-4\]!
>>   **	...
>>   */
>>   float32_t *pre_storefp32_0 (float32_t *a, float32x4_t v)
>> @@ -241,7 +241,7 @@ float32_t *pre_storefp32_0 (float32_t *a, float32x4_t
>> v)
>>   /*
>>   **pre_store32_0:
>>   **	...
>> -**	vstrw.32	q[0-9]+, \[r0, #4\]!
>> +**	vstrw.32	q[0-7]+, \[r0, #4\]!
>>   **	...
>>   */
>>   int32_t* pre_store32_0 (int32_t * a, int32x4_t v)
>> @@ -255,7 +255,7 @@ int32_t* pre_store32_0 (int32_t * a, int32x4_t v)
>>   /*
>>   **post_store8_0:
>>   **	...
>> -**	vstrb.8	q[0-9]+, \[r0\], #-26
>> +**	vstrb.8	q[0-7]+, \[r0\], #-26
>>   **	...
>>   */
>>   int8_t* post_store8_0 (int8_t * a, int8x16_t v)
>> @@ -268,7 +268,7 @@ int8_t* post_store8_0 (int8_t * a, int8x16_t v)
>>   /*
>>   **post_store8_1:
>>   **	...
>> -**	vstrb.16	q[0-9]+, \[r0\], #1
>> +**	vstrb.16	q[0-7]+, \[r0\], #1
>>   **	...
>>   */
>>   uint8_t* post_store8_1 (uint8_t * a, uint16x8_t v)
>> @@ -281,7 +281,7 @@ uint8_t* post_store8_1 (uint8_t * a, uint16x8_t v)
>>   /*
>>   **post_store8_2:
>>   **	...
>> -**	vstrb.8	q[0-9]+, \[r0\], #-26
>> +**	vstrb.8	q[0-7]+, \[r0\], #-26
>>   **	...
>>   */
>>   int8_t* post_store8_2 (int8_t * a, int8x16_t v)
>> @@ -294,7 +294,7 @@ int8_t* post_store8_2 (int8_t * a, int8x16_t v)
>>   /*
>>   **post_store8_3:
>>   **	...
>> -**	vstrb.16	q[0-9]+, \[r0\], #7
>> +**	vstrb.16	q[0-7]+, \[r0\], #7
>>   **	...
>>   */
>>   uint8_t* post_store8_3 (uint8_t * a, uint16x8_t v)
>> @@ -307,7 +307,7 @@ uint8_t* post_store8_3 (uint8_t * a, uint16x8_t v)
>>   /*
>>   **post_storefp16_0:
>>   **	...
>> -**	vstrh.16	q[0-9]+, \[r0\], #-16
>> +**	vstrh.16	q[0-7]+, \[r0\], #-16
>>   **	...
>>   */
>>   float16_t *post_storefp16_0 (float16_t *a, float16x8_t v)
>> @@ -320,7 +320,7 @@ float16_t *post_storefp16_0 (float16_t *a,
>> float16x8_t v)
>>   /*
>>   **post_store16_0:
>>   **	...
>> -**	vstrh.16	q[0-9]+, \[r0\], #126
>> +**	vstrh.16	q[0-7]+, \[r0\], #126
>>   **	...
>>   */
>>   int16_t* post_store16_0 (int16_t * a, int16x8_t v)
>> @@ -333,7 +333,7 @@ int16_t* post_store16_0 (int16_t * a, int16x8_t v)
>>   /*
>>   **post_store16_1:
>>   **	...
>> -**	vstrh.32	q[0-9]+, \[r0\], #-16
>> +**	vstrh.32	q[0-7]+, \[r0\], #-16
>>   **	...
>>   */
>>   uint16_t* post_store16_1 (uint16_t * a, uint32x4_t v)
>> @@ -346,7 +346,7 @@ uint16_t* post_store16_1 (uint16_t * a, uint32x4_t v)
>>   /*
>>   **post_storefp32_0:
>>   **	...
>> -**	vstrw.32	q[0-9]+, \[r0\], #-16
>> +**	vstrw.32	q[0-7]+, \[r0\], #-16
>>   **	...
>>   */
>>   float32_t* post_storefp32_0 (float32_t * a, float32x4_t v)
>> @@ -359,7 +359,7 @@ float32_t* post_storefp32_0 (float32_t * a,
>> float32x4_t v)
>>   /*
>>   **post_store32_0:
>>   **	...
>> -**	vstrw.32	q[0-9]+, \[r0\], #16
>> +**	vstrw.32	q[0-7]+, \[r0\], #16
>>   **	...
>>   */
>>   int32_t* post_store32_0 (int32_t * a, int32x4_t v)
>> --
>> 2.34.1
>
  
Kyrylo Tkachov Sept. 29, 2022, 3:32 p.m. UTC | #3
> -----Original Message-----
> From: Christophe Lyon <Christophe.Lyon@arm.com>
> Sent: Thursday, September 29, 2022 4:24 PM
> To: Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>; gcc-patches@gcc.gnu.org
> Cc: Andre Simoes Dias Vieira <Andre.SimoesDiasVieira@arm.com>
> Subject: Re: [PATCH] testsuite: [arm] Relax expected register names in MVE
> tests
> 
> 
> 
> On 9/29/22 17:01, Kyrylo Tkachov wrote:
> > Hi Christophe,
> >
> >> -----Original Message-----
> >> From: Christophe Lyon <christophe.lyon@arm.com>
> >> Sent: Thursday, September 29, 2022 3:57 PM
> >> To: gcc-patches@gcc.gnu.org
> >> Cc: Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>; Andre Simoes Dias Vieira
> >> <Andre.SimoesDiasVieira@arm.com>; Christophe Lyon
> >> <Christophe.Lyon@arm.com>
> >> Subject: [PATCH] testsuite: [arm] Relax expected register names in MVE
> tests
> >>
> >> These two tests have hardcoded q0 as destination/source of load/store
> >> instructions, but it is actually used only under
> >> -mfloat-abi=hard. When using -mfloat-abi=softfp, other registers
> >> (eg. q3) can be used to transfer function arguments from core
> >> registers to MVE registers, making the expected regexp fail.
> >>
> >> This small patch replaces q0 with q[0-7] to accept any 'q' register.
> >>
> >> OK for trunk?
> >>
> >> Thanks,
> >>
> >> Christophe
> >>
> >> gcc/testsuite/ChangeLog:
> >>
> >> 	* gcc.target/arm/mve/mve_load_memory_modes.c: Update
> >> expected
> >> 	registers.
> >> 	* gcc.target/arm/mve/mve_store_memory_modes.c: Likewise.
> >> ---
> >>   .../arm/mve/mve_load_memory_modes.c           | 58 +++++++++----------
> >>   .../arm/mve/mve_store_memory_modes.c          | 58 +++++++++----------
> >>   2 files changed, 58 insertions(+), 58 deletions(-)
> >>
> >> diff --git a/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
> >> b/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
> >> index e35eb1108aa..fa05fdcefec 100644
> >> --- a/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
> >> +++ b/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
> >> @@ -7,7 +7,7 @@
> >>   /*
> >>   **off_load8_0:
> >>   **	...
> >> -**	vldrb.8	q0, \[r0, #16\]
> >> +**	vldrb.8	q[0-7], \[r0, #16\]
> >>   **	...
> >>   */
> >>   int8x16_t off_load8_0 (int8_t * a)
> >> @@ -18,7 +18,7 @@ int8x16_t off_load8_0 (int8_t * a)
> >>   /*
> >>   **off_load8_1:
> >>   **	...
> >> -**	vldrb.u16	q0, \[r0, #1\]
> >> +**	vldrb.u16	q[0-7], \[r0, #1\]
> >>   **	...
> >>   */
> >>   uint16x8_t off_load8_1 (uint8_t * a)
> >> @@ -29,7 +29,7 @@ uint16x8_t off_load8_1 (uint8_t * a)
> >>   /*
> >>   **off_load8_2:
> >>   **	...
> >> -**	vldrb.s32	q0, \[r0, #127\]
> >> +**	vldrb.s32	q[0-7], \[r0, #127\]
> >>   **	...
> >>   */
> >>   int32x4_t off_load8_2 (int8_t * a)
> >> @@ -40,7 +40,7 @@ int32x4_t off_load8_2 (int8_t * a)
> >>   /*
> >>   **off_load8_3:
> >>   **	...
> >> -**	vldrb.8	q0, \[r0, #-127\]
> >> +**	vldrb.8	q[0-7], \[r0, #-127\]
> >>   **	...
> >>   */
> >>   uint8x16_t off_load8_3 (uint8_t * a)
> >> @@ -51,7 +51,7 @@ uint8x16_t off_load8_3 (uint8_t * a)
> >>   /*
> >>   **not_off_load8_0:
> >>   **	...
> >> -**	vldrb.8	q0, \[r[0-9]+\]
> >> +**	vldrb.8	q[0-7], \[r[0-7]+\]
> >>   **	...
> >>   */
> >>   int8x16_t not_off_load8_0 (int8_t * a)
> >> @@ -62,7 +62,7 @@ int8x16_t not_off_load8_0 (int8_t * a)
> >>   /*
> >>   **off_loadfp16_0:
> >>   **	...
> >> -**	vldrh.16	q0, \[r0, #-244\]
> >> +**	vldrh.16	q[0-7], \[r0, #-244\]
> >>   **	...
> >>   */
> >>   float16x8_t off_loadfp16_0 (float16_t *a)
> >> @@ -73,7 +73,7 @@ float16x8_t off_loadfp16_0 (float16_t *a)
> >>   /*
> >>   **off_load16_0:
> >>   **	...
> >> -**	vldrh.16	q0, \[r0, #-2\]
> >> +**	vldrh.16	q[0-7], \[r0, #-2\]
> >>   **	...
> >>   */
> >>   uint16x8_t off_load16_0 (uint16_t * a)
> >> @@ -84,7 +84,7 @@ uint16x8_t off_load16_0 (uint16_t * a)
> >>   /*
> >>   **off_load16_1:
> >>   **	...
> >> -**	vldrh.u32	q0, \[r0, #254\]
> >> +**	vldrh.u32	q[0-7], \[r0, #254\]
> >>   **	...
> >>   */
> >>   uint32x4_t off_load16_1 (uint16_t * a)
> >> @@ -95,7 +95,7 @@ uint32x4_t off_load16_1 (uint16_t * a)
> >>   /*
> >>   **not_off_load16_0:
> >>   **	...
> >> -**	vldrh.16	q0, \[r[0-9]+\]
> >> +**	vldrh.16	q[0-7], \[r[0-7]+\]
> >>   **	...
> >>   */
> >>   int16x8_t not_off_load16_0 (int8_t * a)
> >> @@ -106,7 +106,7 @@ int16x8_t not_off_load16_0 (int8_t * a)
> >>   /*
> >>   **not_off_load16_1:
> >>   **	...
> >> -**	vldrh.u32	q0, \[r[0-9]+\]
> >> +**	vldrh.u32	q[0-7], \[r[0-7]+\]
> >>   **	...
> >>   */
> >>   uint32x4_t not_off_load16_1 (uint16_t * a)
> >> @@ -117,7 +117,7 @@ uint32x4_t not_off_load16_1 (uint16_t * a)
> >>   /*
> >>   **off_loadfp32_0:
> >>   **	...
> >> -**	vldrw.32	q0, \[r0, #24\]
> >> +**	vldrw.32	q[0-7], \[r0, #24\]
> >>   **	...
> >>   */
> >>   float32x4_t off_loadfp32_0 (float32_t *a)
> >> @@ -128,7 +128,7 @@ float32x4_t off_loadfp32_0 (float32_t *a)
> >>   /*
> >>   **off_load32_0:
> >>   **	...
> >> -**	vldrw.32	q0, \[r0, #4\]
> >> +**	vldrw.32	q[0-7], \[r0, #4\]
> >>   **	...
> >>   */
> >>   uint32x4_t off_load32_0 (uint32_t * a)
> >> @@ -139,7 +139,7 @@ uint32x4_t off_load32_0 (uint32_t * a)
> >>   /*
> >>   **off_load32_1:
> >>   **	...
> >> -**	vldrw.32	q0, \[r0, #-508\]
> >> +**	vldrw.32	q[0-7], \[r0, #-508\]
> >>   **	...
> >>   */
> >
> > These make sense....
> >
> >>   int32x4_t off_load32_1 (int32_t * a)
> >> @@ -149,7 +149,7 @@ int32x4_t off_load32_1 (int32_t * a)
> >>   /*
> >>   **pre_load8_0:
> >>   **	...
> >> -**	vldrb.8	q[0-9]+, \[r0, #16\]!
> >> +**	vldrb.8	q[0-7]+, \[r0, #16\]!
> >>   **	...
> >>   */
> >
> >
> > ... but what is the reason for these changes?
> 
> oops, good catch.
> I originally replaced all q0 with q[0-9], then realized q[0-7] was
> probably better/safer, looks like I also changed pre-existing 0-9 and
> didn't notice.
> 
> So since I have to re-submit this patch, what's the best choice?
> q[0-9] or q[0-7]?

I just remembered that MVE has only Q0-Q7 (unlike Neon) so q[0-7] would be correct here, without the '+'
Thanks,
Kyrill

> 
> Thanks,
> 
> Christophe
> 
> > Thanks,
> > Kyrill
> >
> >>   int8_t* pre_load8_0 (int8_t * a, int8x16_t *v)
> >> @@ -162,7 +162,7 @@ int8_t* pre_load8_0 (int8_t * a, int8x16_t *v)
> >>   /*
> >>   **pre_load8_1:
> >>   **	...
> >> -**	vldrb.u16	q[0-9]+, \[r0, #4\]!
> >> +**	vldrb.u16	q[0-7]+, \[r0, #4\]!
> >>   **	...
> >>   */
> >>   uint8_t* pre_load8_1 (uint8_t * a, uint16x8_t *v)
> >> @@ -175,7 +175,7 @@ uint8_t* pre_load8_1 (uint8_t * a, uint16x8_t *v)
> >>   /*
> >>   **pre_loadfp16_0:
> >>   **	...
> >> -**	vldrh.16	q[0-9]+, \[r0, #128\]!
> >> +**	vldrh.16	q[0-7]+, \[r0, #128\]!
> >>   **	...
> >>   */
> >>   float16_t* pre_loadfp16_0 (float16_t *a, float16x8_t *v)
> >> @@ -188,7 +188,7 @@ float16_t* pre_loadfp16_0 (float16_t *a,
> float16x8_t
> >> *v)
> >>   /*
> >>   **pre_load16_0:
> >>   **	...
> >> -**	vldrh.16	q[0-9]+, \[r0, #-254\]!
> >> +**	vldrh.16	q[0-7]+, \[r0, #-254\]!
> >>   **	...
> >>   */
> >>   int16_t* pre_load16_0 (int16_t * a, int16x8_t *v)
> >> @@ -201,7 +201,7 @@ int16_t* pre_load16_0 (int16_t * a, int16x8_t *v)
> >>   /*
> >>   **pre_load16_1:
> >>   **	...
> >> -**	vldrh.s32	q[0-9]+, \[r0, #52\]!
> >> +**	vldrh.s32	q[0-7]+, \[r0, #52\]!
> >>   **	...
> >>   */
> >>   int16_t* pre_load16_1 (int16_t * a, int32x4_t *v)
> >> @@ -214,7 +214,7 @@ int16_t* pre_load16_1 (int16_t * a, int32x4_t *v)
> >>   /*
> >>   **pre_loadfp32_0:
> >>   **	...
> >> -**	vldrw.32	q[0-9]+, \[r0, #-72\]!
> >> +**	vldrw.32	q[0-7]+, \[r0, #-72\]!
> >>   **	...
> >>   */
> >>   float32_t* pre_loadfp32_0 (float32_t *a, float32x4_t *v)
> >> @@ -228,7 +228,7 @@ float32_t* pre_loadfp32_0 (float32_t *a,
> float32x4_t
> >> *v)
> >>   /*
> >>   **pre_load32_0:
> >>   **	...
> >> -**	vldrw.32	q[0-9]+, \[r0, #-4\]!
> >> +**	vldrw.32	q[0-7]+, \[r0, #-4\]!
> >>   **	...
> >>   */
> >>   uint32_t* pre_load32_0 (uint32_t * a, uint32x4_t *v)
> >> @@ -242,7 +242,7 @@ uint32_t* pre_load32_0 (uint32_t * a, uint32x4_t
> *v)
> >>   /*
> >>   **post_load8_0:
> >>   **	...
> >> -**	vldrb.8	q[0-9]+, \[r0\], #26
> >> +**	vldrb.8	q[0-7]+, \[r0\], #26
> >>   **	...
> >>   */
> >>   uint8_t* post_load8_0 (uint8_t * a, uint8x16_t *v)
> >> @@ -255,7 +255,7 @@ uint8_t* post_load8_0 (uint8_t * a, uint8x16_t *v)
> >>   /*
> >>   **post_load8_1:
> >>   **	...
> >> -**	vldrb.s16	q[0-9]+, \[r0\], #-1
> >> +**	vldrb.s16	q[0-7]+, \[r0\], #-1
> >>   **	...
> >>   */
> >>   int8_t* post_load8_1 (int8_t * a, int16x8_t *v)
> >> @@ -268,7 +268,7 @@ int8_t* post_load8_1 (int8_t * a, int16x8_t *v)
> >>   /*
> >>   **post_load8_2:
> >>   **	...
> >> -**	vldrb.8	q[0-9]+, \[r0\], #26
> >> +**	vldrb.8	q[0-7]+, \[r0\], #26
> >>   **	...
> >>   */
> >>   uint8_t* post_load8_2 (uint8_t * a, uint8x16_t *v)
> >> @@ -281,7 +281,7 @@ uint8_t* post_load8_2 (uint8_t * a, uint8x16_t *v)
> >>   /*
> >>   **post_load8_3:
> >>   **	...
> >> -**	vldrb.s16	q[0-9]+, \[r0\], #-1
> >> +**	vldrb.s16	q[0-7]+, \[r0\], #-1
> >>   **	...
> >>   */
> >>   int8_t* post_load8_3 (int8_t * a, int16x8_t *v)
> >> @@ -294,7 +294,7 @@ int8_t* post_load8_3 (int8_t * a, int16x8_t *v)
> >>   /*
> >>   **post_loadfp16_0:
> >>   **	...
> >> -**	vldrh.16	q[0-9]+, \[r0\], #-24
> >> +**	vldrh.16	q[0-7]+, \[r0\], #-24
> >>   **	...
> >>   */
> >>   float16_t* post_loadfp16_0 (float16_t *a, float16x8_t *v)
> >> @@ -307,7 +307,7 @@ float16_t* post_loadfp16_0 (float16_t *a,
> float16x8_t
> >> *v)
> >>   /*
> >>   **post_load16_0:
> >>   **	...
> >> -**	vldrh.16	q[0-9]+, \[r0\], #-126
> >> +**	vldrh.16	q[0-7]+, \[r0\], #-126
> >>   **	...
> >>   */
> >>   uint16_t* post_load16_0 (uint16_t * a, uint16x8_t *v)
> >> @@ -320,7 +320,7 @@ uint16_t* post_load16_0 (uint16_t * a, uint16x8_t
> >> *v)
> >>   /*
> >>   **post_load16_1:
> >>   **	...
> >> -**	vldrh.u32	q[0-9]+, \[r0\], #16
> >> +**	vldrh.u32	q[0-7]+, \[r0\], #16
> >>   **	...
> >>   */
> >>   uint16_t* post_load16_1 (uint16_t * a, uint32x4_t *v)
> >> @@ -333,7 +333,7 @@ uint16_t* post_load16_1 (uint16_t * a, uint32x4_t
> >> *v)
> >>   /*
> >>   **post_loadfp32_0:
> >>   **	...
> >> -**	vldrw.32	q[0-9]+, \[r0\], #4
> >> +**	vldrw.32	q[0-7]+, \[r0\], #4
> >>   **	...
> >>   */
> >>   float32_t* post_loadfp32_0 (float32_t *a, float32x4_t *v)
> >> @@ -346,7 +346,7 @@ float32_t* post_loadfp32_0 (float32_t *a,
> float32x4_t
> >> *v)
> >>   /*
> >>   **post_load32_0:
> >>   **	...
> >> -**	vldrw.32	q[0-9]+, \[r0\], #-16
> >> +**	vldrw.32	q[0-7]+, \[r0\], #-16
> >>   **	...
> >>   */
> >>   int32_t* post_load32_0 (int32_t * a, int32x4_t *v)
> >> diff --git
> a/gcc/testsuite/gcc.target/arm/mve/mve_store_memory_modes.c
> >> b/gcc/testsuite/gcc.target/arm/mve/mve_store_memory_modes.c
> >> index 632f5b44f0b..2fc62916657 100644
> >> --- a/gcc/testsuite/gcc.target/arm/mve/mve_store_memory_modes.c
> >> +++ b/gcc/testsuite/gcc.target/arm/mve/mve_store_memory_modes.c
> >> @@ -7,7 +7,7 @@
> >>   /*
> >>   **off_store8_0:
> >>   **	...
> >> -**	vstrb.8	q0, \[r0, #16\]
> >> +**	vstrb.8	q[0-7], \[r0, #16\]
> >>   **	...
> >>   */
> >>   uint8_t *off_store8_0 (uint8_t * a, uint8x16_t v)
> >> @@ -19,7 +19,7 @@ uint8_t *off_store8_0 (uint8_t * a, uint8x16_t v)
> >>   /*
> >>   **off_store8_1:
> >>   **	...
> >> -**	vstrb.16	q0, \[r0, #-1\]
> >> +**	vstrb.16	q[0-7], \[r0, #-1\]
> >>   **	...
> >>   */
> >>   int8_t *off_store8_1 (int8_t * a, int16x8_t v)
> >> @@ -31,7 +31,7 @@ int8_t *off_store8_1 (int8_t * a, int16x8_t v)
> >>   /*
> >>   **off_store8_2:
> >>   **	...
> >> -**	vstrb.32	q0, \[r0, #-127\]
> >> +**	vstrb.32	q[0-7], \[r0, #-127\]
> >>   **	...
> >>   */
> >>   uint8_t *off_store8_2 (uint8_t * a, uint32x4_t v)
> >> @@ -43,7 +43,7 @@ uint8_t *off_store8_2 (uint8_t * a, uint32x4_t v)
> >>   /*
> >>   **off_store8_3:
> >>   **	...
> >> -**	vstrb.8	q0, \[r0, #127\]
> >> +**	vstrb.8	q[0-7], \[r0, #127\]
> >>   **	...
> >>   */
> >>   int8_t *off_store8_3 (int8_t * a, int8x16_t v)
> >> @@ -55,7 +55,7 @@ int8_t *off_store8_3 (int8_t * a, int8x16_t v)
> >>   /*
> >>   **not_off_store8_0:
> >>   **	...
> >> -**	vstrb.8	q0, \[r[0-9]+\]
> >> +**	vstrb.8	q[0-7], \[r[0-7]+\]
> >>   **	...
> >>   */
> >>   uint8_t *not_off_store8_0 (uint8_t * a, uint8x16_t v)
> >> @@ -67,7 +67,7 @@ uint8_t *not_off_store8_0 (uint8_t * a, uint8x16_t v)
> >>   /*
> >>   **off_storefp16_0:
> >>   **	...
> >> -**	vstrh.16	q0, \[r0, #250\]
> >> +**	vstrh.16	q[0-7], \[r0, #250\]
> >>   **	...
> >>   */
> >>   float16_t *off_storefp16_0 (float16_t *a, float16x8_t v)
> >> @@ -79,7 +79,7 @@ float16_t *off_storefp16_0 (float16_t *a, float16x8_t
> v)
> >>   /*
> >>   **off_store16_0:
> >>   **	...
> >> -**	vstrh.16	q0, \[r0, #4\]
> >> +**	vstrh.16	q[0-7], \[r0, #4\]
> >>   **	...
> >>   */
> >>   int16_t *off_store16_0 (int16_t * a, int16x8_t v)
> >> @@ -91,7 +91,7 @@ int16_t *off_store16_0 (int16_t * a, int16x8_t v)
> >>   /*
> >>   **off_store16_1:
> >>   **	...
> >> -**	vstrh.32	q0, \[r0, #-254\]
> >> +**	vstrh.32	q[0-7], \[r0, #-254\]
> >>   **	...
> >>   */
> >>   int16_t *off_store16_1 (int16_t * a, int32x4_t v)
> >> @@ -103,7 +103,7 @@ int16_t *off_store16_1 (int16_t * a, int32x4_t v)
> >>   /*
> >>   **not_off_store16_0:
> >>   **	...
> >> -**	vstrh.16	q0, \[r[0-9]+\]
> >> +**	vstrh.16	q[0-7], \[r[0-7]+\]
> >>   **	...
> >>   */
> >>   uint8_t *not_off_store16_0 (uint8_t * a, uint16x8_t v)
> >> @@ -115,7 +115,7 @@ uint8_t *not_off_store16_0 (uint8_t * a,
> uint16x8_t
> >> v)
> >>   /*
> >>   **not_off_store16_1:
> >>   **	...
> >> -**	vstrh.32	q0, \[r[0-9]+\]
> >> +**	vstrh.32	q[0-7], \[r[0-7]+\]
> >>   **	...
> >>   */
> >>   int16_t *not_off_store16_1 (int16_t * a, int32x4_t v)
> >> @@ -127,7 +127,7 @@ int16_t *not_off_store16_1 (int16_t * a, int32x4_t
> v)
> >>   /*
> >>   **off_storefp32_0:
> >>   **	...
> >> -**	vstrw.32	q0, \[r0, #-412\]
> >> +**	vstrw.32	q[0-7], \[r0, #-412\]
> >>   **	...
> >>   */
> >>   float32_t *off_storefp32_0 (float32_t *a, float32x4_t v)
> >> @@ -139,7 +139,7 @@ float32_t *off_storefp32_0 (float32_t *a,
> float32x4_t
> >> v)
> >>   /*
> >>   **off_store32_0:
> >>   **	...
> >> -**	vstrw.32	q0, \[r0, #-4\]
> >> +**	vstrw.32	q[0-7], \[r0, #-4\]
> >>   **	...
> >>   */
> >>   int32_t *off_store32_0 (int32_t * a, int32x4_t v)
> >> @@ -151,7 +151,7 @@ int32_t *off_store32_0 (int32_t * a, int32x4_t v)
> >>   /*
> >>   **off_store32_1:
> >>   **	...
> >> -**	vstrw.32	q0, \[r0, #508\]
> >> +**	vstrw.32	q[0-7], \[r0, #508\]
> >>   **	...
> >>   */
> >>   uint32_t *off_store32_1 (uint32_t * a, uint32x4_t v)
> >> @@ -163,7 +163,7 @@ uint32_t *off_store32_1 (uint32_t * a, uint32x4_t
> v)
> >>   /*
> >>   **pre_store8_0:
> >>   **	...
> >> -**	vstrb.8	q[0-9]+, \[r0, #-16\]!
> >> +**	vstrb.8	q[0-7]+, \[r0, #-16\]!
> >>   **	...
> >>   */
> >>   uint8_t* pre_store8_0 (uint8_t * a, uint8x16_t v)
> >> @@ -176,7 +176,7 @@ uint8_t* pre_store8_0 (uint8_t * a, uint8x16_t v)
> >>   /*
> >>   **pre_store8_1:
> >>   **	...
> >> -**	vstrb.16	q[0-9]+, \[r0, #4\]!
> >> +**	vstrb.16	q[0-7]+, \[r0, #4\]!
> >>   **	...
> >>   */
> >>   int8_t* pre_store8_1 (int8_t * a, int16x8_t v)
> >> @@ -189,7 +189,7 @@ int8_t* pre_store8_1 (int8_t * a, int16x8_t v)
> >>   /*
> >>   **pre_storefp16_0:
> >>   **	...
> >> -**	vstrh.16	q0, \[r0, #8\]!
> >> +**	vstrh.16	q[0-7], \[r0, #8\]!
> >>   **	...
> >>   */
> >>   float16_t *pre_storefp16_0 (float16_t *a, float16x8_t v)
> >> @@ -202,7 +202,7 @@ float16_t *pre_storefp16_0 (float16_t *a,
> float16x8_t
> >> v)
> >>   /*
> >>   **pre_store16_0:
> >>   **	...
> >> -**	vstrh.16	q[0-9]+, \[r0, #254\]!
> >> +**	vstrh.16	q[0-7]+, \[r0, #254\]!
> >>   **	...
> >>   */
> >>   uint16_t* pre_store16_0 (uint16_t * a, uint16x8_t v)
> >> @@ -215,7 +215,7 @@ uint16_t* pre_store16_0 (uint16_t * a, uint16x8_t
> v)
> >>   /*
> >>   **pre_store16_1:
> >>   **	...
> >> -**	vstrh.32	q[0-9]+, \[r0, #-52\]!
> >> +**	vstrh.32	q[0-7]+, \[r0, #-52\]!
> >>   **	...
> >>   */
> >>   int16_t* pre_store16_1 (int16_t * a, int32x4_t v)
> >> @@ -228,7 +228,7 @@ int16_t* pre_store16_1 (int16_t * a, int32x4_t v)
> >>   /*
> >>   **pre_storefp32_0:
> >>   **	...
> >> -**	vstrw.32	q0, \[r0, #-4\]!
> >> +**	vstrw.32	q[0-7], \[r0, #-4\]!
> >>   **	...
> >>   */
> >>   float32_t *pre_storefp32_0 (float32_t *a, float32x4_t v)
> >> @@ -241,7 +241,7 @@ float32_t *pre_storefp32_0 (float32_t *a,
> float32x4_t
> >> v)
> >>   /*
> >>   **pre_store32_0:
> >>   **	...
> >> -**	vstrw.32	q[0-9]+, \[r0, #4\]!
> >> +**	vstrw.32	q[0-7]+, \[r0, #4\]!
> >>   **	...
> >>   */
> >>   int32_t* pre_store32_0 (int32_t * a, int32x4_t v)
> >> @@ -255,7 +255,7 @@ int32_t* pre_store32_0 (int32_t * a, int32x4_t v)
> >>   /*
> >>   **post_store8_0:
> >>   **	...
> >> -**	vstrb.8	q[0-9]+, \[r0\], #-26
> >> +**	vstrb.8	q[0-7]+, \[r0\], #-26
> >>   **	...
> >>   */
> >>   int8_t* post_store8_0 (int8_t * a, int8x16_t v)
> >> @@ -268,7 +268,7 @@ int8_t* post_store8_0 (int8_t * a, int8x16_t v)
> >>   /*
> >>   **post_store8_1:
> >>   **	...
> >> -**	vstrb.16	q[0-9]+, \[r0\], #1
> >> +**	vstrb.16	q[0-7]+, \[r0\], #1
> >>   **	...
> >>   */
> >>   uint8_t* post_store8_1 (uint8_t * a, uint16x8_t v)
> >> @@ -281,7 +281,7 @@ uint8_t* post_store8_1 (uint8_t * a, uint16x8_t v)
> >>   /*
> >>   **post_store8_2:
> >>   **	...
> >> -**	vstrb.8	q[0-9]+, \[r0\], #-26
> >> +**	vstrb.8	q[0-7]+, \[r0\], #-26
> >>   **	...
> >>   */
> >>   int8_t* post_store8_2 (int8_t * a, int8x16_t v)
> >> @@ -294,7 +294,7 @@ int8_t* post_store8_2 (int8_t * a, int8x16_t v)
> >>   /*
> >>   **post_store8_3:
> >>   **	...
> >> -**	vstrb.16	q[0-9]+, \[r0\], #7
> >> +**	vstrb.16	q[0-7]+, \[r0\], #7
> >>   **	...
> >>   */
> >>   uint8_t* post_store8_3 (uint8_t * a, uint16x8_t v)
> >> @@ -307,7 +307,7 @@ uint8_t* post_store8_3 (uint8_t * a, uint16x8_t v)
> >>   /*
> >>   **post_storefp16_0:
> >>   **	...
> >> -**	vstrh.16	q[0-9]+, \[r0\], #-16
> >> +**	vstrh.16	q[0-7]+, \[r0\], #-16
> >>   **	...
> >>   */
> >>   float16_t *post_storefp16_0 (float16_t *a, float16x8_t v)
> >> @@ -320,7 +320,7 @@ float16_t *post_storefp16_0 (float16_t *a,
> >> float16x8_t v)
> >>   /*
> >>   **post_store16_0:
> >>   **	...
> >> -**	vstrh.16	q[0-9]+, \[r0\], #126
> >> +**	vstrh.16	q[0-7]+, \[r0\], #126
> >>   **	...
> >>   */
> >>   int16_t* post_store16_0 (int16_t * a, int16x8_t v)
> >> @@ -333,7 +333,7 @@ int16_t* post_store16_0 (int16_t * a, int16x8_t v)
> >>   /*
> >>   **post_store16_1:
> >>   **	...
> >> -**	vstrh.32	q[0-9]+, \[r0\], #-16
> >> +**	vstrh.32	q[0-7]+, \[r0\], #-16
> >>   **	...
> >>   */
> >>   uint16_t* post_store16_1 (uint16_t * a, uint32x4_t v)
> >> @@ -346,7 +346,7 @@ uint16_t* post_store16_1 (uint16_t * a,
> uint32x4_t v)
> >>   /*
> >>   **post_storefp32_0:
> >>   **	...
> >> -**	vstrw.32	q[0-9]+, \[r0\], #-16
> >> +**	vstrw.32	q[0-7]+, \[r0\], #-16
> >>   **	...
> >>   */
> >>   float32_t* post_storefp32_0 (float32_t * a, float32x4_t v)
> >> @@ -359,7 +359,7 @@ float32_t* post_storefp32_0 (float32_t * a,
> >> float32x4_t v)
> >>   /*
> >>   **post_store32_0:
> >>   **	...
> >> -**	vstrw.32	q[0-9]+, \[r0\], #16
> >> +**	vstrw.32	q[0-7]+, \[r0\], #16
> >>   **	...
> >>   */
> >>   int32_t* post_store32_0 (int32_t * a, int32x4_t v)
> >> --
> >> 2.34.1
> >
  

Patch

diff --git a/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c b/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
index e35eb1108aa..fa05fdcefec 100644
--- a/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
+++ b/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
@@ -7,7 +7,7 @@ 
 /*
 **off_load8_0:
 **	...
-**	vldrb.8	q0, \[r0, #16\]
+**	vldrb.8	q[0-7], \[r0, #16\]
 **	...
 */
 int8x16_t off_load8_0 (int8_t * a)
@@ -18,7 +18,7 @@  int8x16_t off_load8_0 (int8_t * a)
 /*
 **off_load8_1:
 **	...
-**	vldrb.u16	q0, \[r0, #1\]
+**	vldrb.u16	q[0-7], \[r0, #1\]
 **	...
 */
 uint16x8_t off_load8_1 (uint8_t * a)
@@ -29,7 +29,7 @@  uint16x8_t off_load8_1 (uint8_t * a)
 /*
 **off_load8_2:
 **	...
-**	vldrb.s32	q0, \[r0, #127\]
+**	vldrb.s32	q[0-7], \[r0, #127\]
 **	...
 */
 int32x4_t off_load8_2 (int8_t * a)
@@ -40,7 +40,7 @@  int32x4_t off_load8_2 (int8_t * a)
 /*
 **off_load8_3:
 **	...
-**	vldrb.8	q0, \[r0, #-127\]
+**	vldrb.8	q[0-7], \[r0, #-127\]
 **	...
 */
 uint8x16_t off_load8_3 (uint8_t * a)
@@ -51,7 +51,7 @@  uint8x16_t off_load8_3 (uint8_t * a)
 /*
 **not_off_load8_0:
 **	...
-**	vldrb.8	q0, \[r[0-9]+\]
+**	vldrb.8	q[0-7], \[r[0-7]+\]
 **	...
 */
 int8x16_t not_off_load8_0 (int8_t * a)
@@ -62,7 +62,7 @@  int8x16_t not_off_load8_0 (int8_t * a)
 /*
 **off_loadfp16_0:
 **	...
-**	vldrh.16	q0, \[r0, #-244\]
+**	vldrh.16	q[0-7], \[r0, #-244\]
 **	...
 */
 float16x8_t off_loadfp16_0 (float16_t *a)
@@ -73,7 +73,7 @@  float16x8_t off_loadfp16_0 (float16_t *a)
 /*
 **off_load16_0:
 **	...
-**	vldrh.16	q0, \[r0, #-2\]
+**	vldrh.16	q[0-7], \[r0, #-2\]
 **	...
 */
 uint16x8_t off_load16_0 (uint16_t * a)
@@ -84,7 +84,7 @@  uint16x8_t off_load16_0 (uint16_t * a)
 /*
 **off_load16_1:
 **	...
-**	vldrh.u32	q0, \[r0, #254\]
+**	vldrh.u32	q[0-7], \[r0, #254\]
 **	...
 */
 uint32x4_t off_load16_1 (uint16_t * a)
@@ -95,7 +95,7 @@  uint32x4_t off_load16_1 (uint16_t * a)
 /*
 **not_off_load16_0:
 **	...
-**	vldrh.16	q0, \[r[0-9]+\]
+**	vldrh.16	q[0-7], \[r[0-7]+\]
 **	...
 */
 int16x8_t not_off_load16_0 (int8_t * a)
@@ -106,7 +106,7 @@  int16x8_t not_off_load16_0 (int8_t * a)
 /*
 **not_off_load16_1:
 **	...
-**	vldrh.u32	q0, \[r[0-9]+\]
+**	vldrh.u32	q[0-7], \[r[0-7]+\]
 **	...
 */
 uint32x4_t not_off_load16_1 (uint16_t * a)
@@ -117,7 +117,7 @@  uint32x4_t not_off_load16_1 (uint16_t * a)
 /*
 **off_loadfp32_0:
 **	...
-**	vldrw.32	q0, \[r0, #24\]
+**	vldrw.32	q[0-7], \[r0, #24\]
 **	...
 */
 float32x4_t off_loadfp32_0 (float32_t *a)
@@ -128,7 +128,7 @@  float32x4_t off_loadfp32_0 (float32_t *a)
 /*
 **off_load32_0:
 **	...
-**	vldrw.32	q0, \[r0, #4\]
+**	vldrw.32	q[0-7], \[r0, #4\]
 **	...
 */
 uint32x4_t off_load32_0 (uint32_t * a)
@@ -139,7 +139,7 @@  uint32x4_t off_load32_0 (uint32_t * a)
 /*
 **off_load32_1:
 **	...
-**	vldrw.32	q0, \[r0, #-508\]
+**	vldrw.32	q[0-7], \[r0, #-508\]
 **	...
 */
 int32x4_t off_load32_1 (int32_t * a)
@@ -149,7 +149,7 @@  int32x4_t off_load32_1 (int32_t * a)
 /*
 **pre_load8_0:
 **	...
-**	vldrb.8	q[0-9]+, \[r0, #16\]!
+**	vldrb.8	q[0-7]+, \[r0, #16\]!
 **	...
 */
 int8_t* pre_load8_0 (int8_t * a, int8x16_t *v)
@@ -162,7 +162,7 @@  int8_t* pre_load8_0 (int8_t * a, int8x16_t *v)
 /*
 **pre_load8_1:
 **	...
-**	vldrb.u16	q[0-9]+, \[r0, #4\]!
+**	vldrb.u16	q[0-7]+, \[r0, #4\]!
 **	...
 */
 uint8_t* pre_load8_1 (uint8_t * a, uint16x8_t *v)
@@ -175,7 +175,7 @@  uint8_t* pre_load8_1 (uint8_t * a, uint16x8_t *v)
 /*
 **pre_loadfp16_0:
 **	...
-**	vldrh.16	q[0-9]+, \[r0, #128\]!
+**	vldrh.16	q[0-7]+, \[r0, #128\]!
 **	...
 */
 float16_t* pre_loadfp16_0 (float16_t *a, float16x8_t *v)
@@ -188,7 +188,7 @@  float16_t* pre_loadfp16_0 (float16_t *a, float16x8_t *v)
 /*
 **pre_load16_0:
 **	...
-**	vldrh.16	q[0-9]+, \[r0, #-254\]!
+**	vldrh.16	q[0-7]+, \[r0, #-254\]!
 **	...
 */
 int16_t* pre_load16_0 (int16_t * a, int16x8_t *v)
@@ -201,7 +201,7 @@  int16_t* pre_load16_0 (int16_t * a, int16x8_t *v)
 /*
 **pre_load16_1:
 **	...
-**	vldrh.s32	q[0-9]+, \[r0, #52\]!
+**	vldrh.s32	q[0-7]+, \[r0, #52\]!
 **	...
 */
 int16_t* pre_load16_1 (int16_t * a, int32x4_t *v)
@@ -214,7 +214,7 @@  int16_t* pre_load16_1 (int16_t * a, int32x4_t *v)
 /*
 **pre_loadfp32_0:
 **	...
-**	vldrw.32	q[0-9]+, \[r0, #-72\]!
+**	vldrw.32	q[0-7]+, \[r0, #-72\]!
 **	...
 */
 float32_t* pre_loadfp32_0 (float32_t *a, float32x4_t *v)
@@ -228,7 +228,7 @@  float32_t* pre_loadfp32_0 (float32_t *a, float32x4_t *v)
 /*
 **pre_load32_0:
 **	...
-**	vldrw.32	q[0-9]+, \[r0, #-4\]!
+**	vldrw.32	q[0-7]+, \[r0, #-4\]!
 **	...
 */
 uint32_t* pre_load32_0 (uint32_t * a, uint32x4_t *v)
@@ -242,7 +242,7 @@  uint32_t* pre_load32_0 (uint32_t * a, uint32x4_t *v)
 /*
 **post_load8_0:
 **	...
-**	vldrb.8	q[0-9]+, \[r0\], #26
+**	vldrb.8	q[0-7]+, \[r0\], #26
 **	...
 */
 uint8_t* post_load8_0 (uint8_t * a, uint8x16_t *v)
@@ -255,7 +255,7 @@  uint8_t* post_load8_0 (uint8_t * a, uint8x16_t *v)
 /*
 **post_load8_1:
 **	...
-**	vldrb.s16	q[0-9]+, \[r0\], #-1
+**	vldrb.s16	q[0-7]+, \[r0\], #-1
 **	...
 */
 int8_t* post_load8_1 (int8_t * a, int16x8_t *v)
@@ -268,7 +268,7 @@  int8_t* post_load8_1 (int8_t * a, int16x8_t *v)
 /*
 **post_load8_2:
 **	...
-**	vldrb.8	q[0-9]+, \[r0\], #26
+**	vldrb.8	q[0-7]+, \[r0\], #26
 **	...
 */
 uint8_t* post_load8_2 (uint8_t * a, uint8x16_t *v)
@@ -281,7 +281,7 @@  uint8_t* post_load8_2 (uint8_t * a, uint8x16_t *v)
 /*
 **post_load8_3:
 **	...
-**	vldrb.s16	q[0-9]+, \[r0\], #-1
+**	vldrb.s16	q[0-7]+, \[r0\], #-1
 **	...
 */
 int8_t* post_load8_3 (int8_t * a, int16x8_t *v)
@@ -294,7 +294,7 @@  int8_t* post_load8_3 (int8_t * a, int16x8_t *v)
 /*
 **post_loadfp16_0:
 **	...
-**	vldrh.16	q[0-9]+, \[r0\], #-24
+**	vldrh.16	q[0-7]+, \[r0\], #-24
 **	...
 */
 float16_t* post_loadfp16_0 (float16_t *a, float16x8_t *v)
@@ -307,7 +307,7 @@  float16_t* post_loadfp16_0 (float16_t *a, float16x8_t *v)
 /*
 **post_load16_0:
 **	...
-**	vldrh.16	q[0-9]+, \[r0\], #-126
+**	vldrh.16	q[0-7]+, \[r0\], #-126
 **	...
 */
 uint16_t* post_load16_0 (uint16_t * a, uint16x8_t *v)
@@ -320,7 +320,7 @@  uint16_t* post_load16_0 (uint16_t * a, uint16x8_t *v)
 /*
 **post_load16_1:
 **	...
-**	vldrh.u32	q[0-9]+, \[r0\], #16
+**	vldrh.u32	q[0-7]+, \[r0\], #16
 **	...
 */
 uint16_t* post_load16_1 (uint16_t * a, uint32x4_t *v)
@@ -333,7 +333,7 @@  uint16_t* post_load16_1 (uint16_t * a, uint32x4_t *v)
 /*
 **post_loadfp32_0:
 **	...
-**	vldrw.32	q[0-9]+, \[r0\], #4
+**	vldrw.32	q[0-7]+, \[r0\], #4
 **	...
 */
 float32_t* post_loadfp32_0 (float32_t *a, float32x4_t *v)
@@ -346,7 +346,7 @@  float32_t* post_loadfp32_0 (float32_t *a, float32x4_t *v)
 /*
 **post_load32_0:
 **	...
-**	vldrw.32	q[0-9]+, \[r0\], #-16
+**	vldrw.32	q[0-7]+, \[r0\], #-16
 **	...
 */
 int32_t* post_load32_0 (int32_t * a, int32x4_t *v)
diff --git a/gcc/testsuite/gcc.target/arm/mve/mve_store_memory_modes.c b/gcc/testsuite/gcc.target/arm/mve/mve_store_memory_modes.c
index 632f5b44f0b..2fc62916657 100644
--- a/gcc/testsuite/gcc.target/arm/mve/mve_store_memory_modes.c
+++ b/gcc/testsuite/gcc.target/arm/mve/mve_store_memory_modes.c
@@ -7,7 +7,7 @@ 
 /*
 **off_store8_0:
 **	...
-**	vstrb.8	q0, \[r0, #16\]
+**	vstrb.8	q[0-7], \[r0, #16\]
 **	...
 */
 uint8_t *off_store8_0 (uint8_t * a, uint8x16_t v)
@@ -19,7 +19,7 @@  uint8_t *off_store8_0 (uint8_t * a, uint8x16_t v)
 /*
 **off_store8_1:
 **	...
-**	vstrb.16	q0, \[r0, #-1\]
+**	vstrb.16	q[0-7], \[r0, #-1\]
 **	...
 */
 int8_t *off_store8_1 (int8_t * a, int16x8_t v)
@@ -31,7 +31,7 @@  int8_t *off_store8_1 (int8_t * a, int16x8_t v)
 /*
 **off_store8_2:
 **	...
-**	vstrb.32	q0, \[r0, #-127\]
+**	vstrb.32	q[0-7], \[r0, #-127\]
 **	...
 */
 uint8_t *off_store8_2 (uint8_t * a, uint32x4_t v)
@@ -43,7 +43,7 @@  uint8_t *off_store8_2 (uint8_t * a, uint32x4_t v)
 /*
 **off_store8_3:
 **	...
-**	vstrb.8	q0, \[r0, #127\]
+**	vstrb.8	q[0-7], \[r0, #127\]
 **	...
 */
 int8_t *off_store8_3 (int8_t * a, int8x16_t v)
@@ -55,7 +55,7 @@  int8_t *off_store8_3 (int8_t * a, int8x16_t v)
 /*
 **not_off_store8_0:
 **	...
-**	vstrb.8	q0, \[r[0-9]+\]
+**	vstrb.8	q[0-7], \[r[0-7]+\]
 **	...
 */
 uint8_t *not_off_store8_0 (uint8_t * a, uint8x16_t v)
@@ -67,7 +67,7 @@  uint8_t *not_off_store8_0 (uint8_t * a, uint8x16_t v)
 /*
 **off_storefp16_0:
 **	...
-**	vstrh.16	q0, \[r0, #250\]
+**	vstrh.16	q[0-7], \[r0, #250\]
 **	...
 */
 float16_t *off_storefp16_0 (float16_t *a, float16x8_t v)
@@ -79,7 +79,7 @@  float16_t *off_storefp16_0 (float16_t *a, float16x8_t v)
 /*
 **off_store16_0:
 **	...
-**	vstrh.16	q0, \[r0, #4\]
+**	vstrh.16	q[0-7], \[r0, #4\]
 **	...
 */
 int16_t *off_store16_0 (int16_t * a, int16x8_t v)
@@ -91,7 +91,7 @@  int16_t *off_store16_0 (int16_t * a, int16x8_t v)
 /*
 **off_store16_1:
 **	...
-**	vstrh.32	q0, \[r0, #-254\]
+**	vstrh.32	q[0-7], \[r0, #-254\]
 **	...
 */
 int16_t *off_store16_1 (int16_t * a, int32x4_t v)
@@ -103,7 +103,7 @@  int16_t *off_store16_1 (int16_t * a, int32x4_t v)
 /*
 **not_off_store16_0:
 **	...
-**	vstrh.16	q0, \[r[0-9]+\]
+**	vstrh.16	q[0-7], \[r[0-7]+\]
 **	...
 */
 uint8_t *not_off_store16_0 (uint8_t * a, uint16x8_t v)
@@ -115,7 +115,7 @@  uint8_t *not_off_store16_0 (uint8_t * a, uint16x8_t v)
 /*
 **not_off_store16_1:
 **	...
-**	vstrh.32	q0, \[r[0-9]+\]
+**	vstrh.32	q[0-7], \[r[0-7]+\]
 **	...
 */
 int16_t *not_off_store16_1 (int16_t * a, int32x4_t v)
@@ -127,7 +127,7 @@  int16_t *not_off_store16_1 (int16_t * a, int32x4_t v)
 /*
 **off_storefp32_0:
 **	...
-**	vstrw.32	q0, \[r0, #-412\]
+**	vstrw.32	q[0-7], \[r0, #-412\]
 **	...
 */
 float32_t *off_storefp32_0 (float32_t *a, float32x4_t v)
@@ -139,7 +139,7 @@  float32_t *off_storefp32_0 (float32_t *a, float32x4_t v)
 /*
 **off_store32_0:
 **	...
-**	vstrw.32	q0, \[r0, #-4\]
+**	vstrw.32	q[0-7], \[r0, #-4\]
 **	...
 */
 int32_t *off_store32_0 (int32_t * a, int32x4_t v)
@@ -151,7 +151,7 @@  int32_t *off_store32_0 (int32_t * a, int32x4_t v)
 /*
 **off_store32_1:
 **	...
-**	vstrw.32	q0, \[r0, #508\]
+**	vstrw.32	q[0-7], \[r0, #508\]
 **	...
 */
 uint32_t *off_store32_1 (uint32_t * a, uint32x4_t v)
@@ -163,7 +163,7 @@  uint32_t *off_store32_1 (uint32_t * a, uint32x4_t v)
 /*
 **pre_store8_0:
 **	...
-**	vstrb.8	q[0-9]+, \[r0, #-16\]!
+**	vstrb.8	q[0-7]+, \[r0, #-16\]!
 **	...
 */
 uint8_t* pre_store8_0 (uint8_t * a, uint8x16_t v)
@@ -176,7 +176,7 @@  uint8_t* pre_store8_0 (uint8_t * a, uint8x16_t v)
 /*
 **pre_store8_1:
 **	...
-**	vstrb.16	q[0-9]+, \[r0, #4\]!
+**	vstrb.16	q[0-7]+, \[r0, #4\]!
 **	...
 */
 int8_t* pre_store8_1 (int8_t * a, int16x8_t v)
@@ -189,7 +189,7 @@  int8_t* pre_store8_1 (int8_t * a, int16x8_t v)
 /*
 **pre_storefp16_0:
 **	...
-**	vstrh.16	q0, \[r0, #8\]!
+**	vstrh.16	q[0-7], \[r0, #8\]!
 **	...
 */
 float16_t *pre_storefp16_0 (float16_t *a, float16x8_t v)
@@ -202,7 +202,7 @@  float16_t *pre_storefp16_0 (float16_t *a, float16x8_t v)
 /*
 **pre_store16_0:
 **	...
-**	vstrh.16	q[0-9]+, \[r0, #254\]!
+**	vstrh.16	q[0-7]+, \[r0, #254\]!
 **	...
 */
 uint16_t* pre_store16_0 (uint16_t * a, uint16x8_t v)
@@ -215,7 +215,7 @@  uint16_t* pre_store16_0 (uint16_t * a, uint16x8_t v)
 /*
 **pre_store16_1:
 **	...
-**	vstrh.32	q[0-9]+, \[r0, #-52\]!
+**	vstrh.32	q[0-7]+, \[r0, #-52\]!
 **	...
 */
 int16_t* pre_store16_1 (int16_t * a, int32x4_t v)
@@ -228,7 +228,7 @@  int16_t* pre_store16_1 (int16_t * a, int32x4_t v)
 /*
 **pre_storefp32_0:
 **	...
-**	vstrw.32	q0, \[r0, #-4\]!
+**	vstrw.32	q[0-7], \[r0, #-4\]!
 **	...
 */
 float32_t *pre_storefp32_0 (float32_t *a, float32x4_t v)
@@ -241,7 +241,7 @@  float32_t *pre_storefp32_0 (float32_t *a, float32x4_t v)
 /*
 **pre_store32_0:
 **	...
-**	vstrw.32	q[0-9]+, \[r0, #4\]!
+**	vstrw.32	q[0-7]+, \[r0, #4\]!
 **	...
 */
 int32_t* pre_store32_0 (int32_t * a, int32x4_t v)
@@ -255,7 +255,7 @@  int32_t* pre_store32_0 (int32_t * a, int32x4_t v)
 /*
 **post_store8_0:
 **	...
-**	vstrb.8	q[0-9]+, \[r0\], #-26
+**	vstrb.8	q[0-7]+, \[r0\], #-26
 **	...
 */
 int8_t* post_store8_0 (int8_t * a, int8x16_t v)
@@ -268,7 +268,7 @@  int8_t* post_store8_0 (int8_t * a, int8x16_t v)
 /*
 **post_store8_1:
 **	...
-**	vstrb.16	q[0-9]+, \[r0\], #1
+**	vstrb.16	q[0-7]+, \[r0\], #1
 **	...
 */
 uint8_t* post_store8_1 (uint8_t * a, uint16x8_t v)
@@ -281,7 +281,7 @@  uint8_t* post_store8_1 (uint8_t * a, uint16x8_t v)
 /*
 **post_store8_2:
 **	...
-**	vstrb.8	q[0-9]+, \[r0\], #-26
+**	vstrb.8	q[0-7]+, \[r0\], #-26
 **	...
 */
 int8_t* post_store8_2 (int8_t * a, int8x16_t v)
@@ -294,7 +294,7 @@  int8_t* post_store8_2 (int8_t * a, int8x16_t v)
 /*
 **post_store8_3:
 **	...
-**	vstrb.16	q[0-9]+, \[r0\], #7
+**	vstrb.16	q[0-7]+, \[r0\], #7
 **	...
 */
 uint8_t* post_store8_3 (uint8_t * a, uint16x8_t v)
@@ -307,7 +307,7 @@  uint8_t* post_store8_3 (uint8_t * a, uint16x8_t v)
 /*
 **post_storefp16_0:
 **	...
-**	vstrh.16	q[0-9]+, \[r0\], #-16
+**	vstrh.16	q[0-7]+, \[r0\], #-16
 **	...
 */
 float16_t *post_storefp16_0 (float16_t *a, float16x8_t v)
@@ -320,7 +320,7 @@  float16_t *post_storefp16_0 (float16_t *a, float16x8_t v)
 /*
 **post_store16_0:
 **	...
-**	vstrh.16	q[0-9]+, \[r0\], #126
+**	vstrh.16	q[0-7]+, \[r0\], #126
 **	...
 */
 int16_t* post_store16_0 (int16_t * a, int16x8_t v)
@@ -333,7 +333,7 @@  int16_t* post_store16_0 (int16_t * a, int16x8_t v)
 /*
 **post_store16_1:
 **	...
-**	vstrh.32	q[0-9]+, \[r0\], #-16
+**	vstrh.32	q[0-7]+, \[r0\], #-16
 **	...
 */
 uint16_t* post_store16_1 (uint16_t * a, uint32x4_t v)
@@ -346,7 +346,7 @@  uint16_t* post_store16_1 (uint16_t * a, uint32x4_t v)
 /*
 **post_storefp32_0:
 **	...
-**	vstrw.32	q[0-9]+, \[r0\], #-16
+**	vstrw.32	q[0-7]+, \[r0\], #-16
 **	...
 */
 float32_t* post_storefp32_0 (float32_t * a, float32x4_t v)
@@ -359,7 +359,7 @@  float32_t* post_storefp32_0 (float32_t * a, float32x4_t v)
 /*
 **post_store32_0:
 **	...
-**	vstrw.32	q[0-9]+, \[r0\], #16
+**	vstrw.32	q[0-7]+, \[r0\], #16
 **	...
 */
 int32_t* post_store32_0 (int32_t * a, int32x4_t v)