[6/6] arm: [MVE intrinsics] rework vcmlaq

Message ID 20230713102224.1161596-6-christophe.lyon@linaro.org
State Unresolved
Headers
Series [1/6] arm: [MVE intrinsics] Factorize vcaddq vhcaddq |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

Christophe Lyon July 13, 2023, 10:22 a.m. UTC
  Implement vcmlaq using the new MVE builtins framework.

2023-07-13  Christophe Lyon  <christophe.lyon@linaro.org>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (vcmlaq, vcmlaq_rot90)
	(vcmlaq_rot180, vcmlaq_rot270): New.
	* config/arm/arm-mve-builtins-base.def (vcmlaq, vcmlaq_rot90)
	(vcmlaq_rot180, vcmlaq_rot270): New.
	* config/arm/arm-mve-builtins-base.h: (vcmlaq, vcmlaq_rot90)
	(vcmlaq_rot180, vcmlaq_rot270): New.
	* config/arm/arm-mve-builtins.cc
	(function_instance::has_inactive_argument): Handle vcmlaq,
	vcmlaq_rot90, vcmlaq_rot180, vcmlaq_rot270.
	* config/arm/arm_mve.h (vcmlaq): Delete.
	(vcmlaq_rot180): Delete.
	(vcmlaq_rot270): Delete.
	(vcmlaq_rot90): Delete.
	(vcmlaq_m): Delete.
	(vcmlaq_rot180_m): Delete.
	(vcmlaq_rot270_m): Delete.
	(vcmlaq_rot90_m): Delete.
	(vcmlaq_f16): Delete.
	(vcmlaq_rot180_f16): Delete.
	(vcmlaq_rot270_f16): Delete.
	(vcmlaq_rot90_f16): Delete.
	(vcmlaq_f32): Delete.
	(vcmlaq_rot180_f32): Delete.
	(vcmlaq_rot270_f32): Delete.
	(vcmlaq_rot90_f32): Delete.
	(vcmlaq_m_f32): Delete.
	(vcmlaq_m_f16): Delete.
	(vcmlaq_rot180_m_f32): Delete.
	(vcmlaq_rot180_m_f16): Delete.
	(vcmlaq_rot270_m_f32): Delete.
	(vcmlaq_rot270_m_f16): Delete.
	(vcmlaq_rot90_m_f32): Delete.
	(vcmlaq_rot90_m_f16): Delete.
	(__arm_vcmlaq_f16): Delete.
	(__arm_vcmlaq_rot180_f16): Delete.
	(__arm_vcmlaq_rot270_f16): Delete.
	(__arm_vcmlaq_rot90_f16): Delete.
	(__arm_vcmlaq_f32): Delete.
	(__arm_vcmlaq_rot180_f32): Delete.
	(__arm_vcmlaq_rot270_f32): Delete.
	(__arm_vcmlaq_rot90_f32): Delete.
	(__arm_vcmlaq_m_f32): Delete.
	(__arm_vcmlaq_m_f16): Delete.
	(__arm_vcmlaq_rot180_m_f32): Delete.
	(__arm_vcmlaq_rot180_m_f16): Delete.
	(__arm_vcmlaq_rot270_m_f32): Delete.
	(__arm_vcmlaq_rot270_m_f16): Delete.
	(__arm_vcmlaq_rot90_m_f32): Delete.
	(__arm_vcmlaq_rot90_m_f16): Delete.
	(__arm_vcmlaq): Delete.
	(__arm_vcmlaq_rot180): Delete.
	(__arm_vcmlaq_rot270): Delete.
	(__arm_vcmlaq_rot90): Delete.
	(__arm_vcmlaq_m): Delete.
	(__arm_vcmlaq_rot180_m): Delete.
	(__arm_vcmlaq_rot270_m): Delete.
	(__arm_vcmlaq_rot90_m): Delete.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |   4 +
 gcc/config/arm/arm-mve-builtins-base.def |   4 +
 gcc/config/arm/arm-mve-builtins-base.h   |  16 +-
 gcc/config/arm/arm-mve-builtins.cc       |   4 +
 gcc/config/arm/arm_mve.h                 | 304 -----------------------
 5 files changed, 22 insertions(+), 310 deletions(-)
  

Patch

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index 3ad8df304e8..e31095ae112 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -262,6 +262,10 @@  FUNCTION_WITH_RTX_M (vandq, AND, VANDQ)
 FUNCTION_ONLY_N (vbrsrq, VBRSRQ)
 FUNCTION (vcaddq_rot90, unspec_mve_function_exact_insn_rot, (UNSPEC_VCADD90, UNSPEC_VCADD90, UNSPEC_VCADD90, VCADDQ_ROT90_M_S, VCADDQ_ROT90_M_U, VCADDQ_ROT90_M_F))
 FUNCTION (vcaddq_rot270, unspec_mve_function_exact_insn_rot, (UNSPEC_VCADD270, UNSPEC_VCADD270, UNSPEC_VCADD270, VCADDQ_ROT270_M_S, VCADDQ_ROT270_M_U, VCADDQ_ROT270_M_F))
+FUNCTION (vcmlaq, unspec_mve_function_exact_insn_rot, (-1, -1, UNSPEC_VCMLA, -1, -1, VCMLAQ_M_F))
+FUNCTION (vcmlaq_rot90, unspec_mve_function_exact_insn_rot, (-1, -1, UNSPEC_VCMLA90, -1, -1, VCMLAQ_ROT90_M_F))
+FUNCTION (vcmlaq_rot180, unspec_mve_function_exact_insn_rot, (-1, -1, UNSPEC_VCMLA180, -1, -1, VCMLAQ_ROT180_M_F))
+FUNCTION (vcmlaq_rot270, unspec_mve_function_exact_insn_rot, (-1, -1, UNSPEC_VCMLA270, -1, -1, VCMLAQ_ROT270_M_F))
 FUNCTION (vcmulq, unspec_mve_function_exact_insn_rot, (-1, -1, UNSPEC_VCMUL, -1, -1, VCMULQ_M_F))
 FUNCTION (vcmulq_rot90, unspec_mve_function_exact_insn_rot, (-1, -1, UNSPEC_VCMUL90, -1, -1, VCMULQ_ROT90_M_F))
 FUNCTION (vcmulq_rot180, unspec_mve_function_exact_insn_rot, (-1, -1, UNSPEC_VCMUL180, -1, -1, VCMULQ_ROT180_M_F))
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index cbcf0d296cd..e7d466f2efd 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -158,6 +158,10 @@  DEF_MVE_FUNCTION (vandq, binary, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vbrsrq, binary_imm32, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vcaddq_rot90, binary, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vcaddq_rot270, binary, all_float, mx_or_none)
+DEF_MVE_FUNCTION (vcmlaq, ternary, all_float, m_or_none)
+DEF_MVE_FUNCTION (vcmlaq_rot90, ternary, all_float, m_or_none)
+DEF_MVE_FUNCTION (vcmlaq_rot180, ternary, all_float, m_or_none)
+DEF_MVE_FUNCTION (vcmlaq_rot270, ternary, all_float, m_or_none)
 DEF_MVE_FUNCTION (vcmulq, binary, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vcmulq_rot90, binary, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vcmulq_rot180, binary, all_float, mx_or_none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index 875b333ebef..be3698b4f4c 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -33,14 +33,14 @@  extern const function_base *const vaddvaq;
 extern const function_base *const vaddvq;
 extern const function_base *const vandq;
 extern const function_base *const vbrsrq;
-extern const function_base *const vcaddq_rot90;
 extern const function_base *const vcaddq_rot270;
-extern const function_base *const vcmulq;
-extern const function_base *const vcmulq_rot90;
-extern const function_base *const vcmulq_rot180;
-extern const function_base *const vcmulq_rot270;
+extern const function_base *const vcaddq_rot90;
 extern const function_base *const vclsq;
 extern const function_base *const vclzq;
+extern const function_base *const vcmlaq;
+extern const function_base *const vcmlaq_rot180;
+extern const function_base *const vcmlaq_rot270;
+extern const function_base *const vcmlaq_rot90;
 extern const function_base *const vcmpcsq;
 extern const function_base *const vcmpeqq;
 extern const function_base *const vcmpgeq;
@@ -49,6 +49,10 @@  extern const function_base *const vcmphiq;
 extern const function_base *const vcmpleq;
 extern const function_base *const vcmpltq;
 extern const function_base *const vcmpneq;
+extern const function_base *const vcmulq;
+extern const function_base *const vcmulq_rot180;
+extern const function_base *const vcmulq_rot270;
+extern const function_base *const vcmulq_rot90;
 extern const function_base *const vcreateq;
 extern const function_base *const vdupq;
 extern const function_base *const veorq;
@@ -56,8 +60,8 @@  extern const function_base *const vfmaq;
 extern const function_base *const vfmasq;
 extern const function_base *const vfmsq;
 extern const function_base *const vhaddq;
-extern const function_base *const vhcaddq_rot90;
 extern const function_base *const vhcaddq_rot270;
+extern const function_base *const vhcaddq_rot90;
 extern const function_base *const vhsubq;
 extern const function_base *const vmaxaq;
 extern const function_base *const vmaxavq;
diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc
index 7033e41a571..3272ece6326 100644
--- a/gcc/config/arm/arm-mve-builtins.cc
+++ b/gcc/config/arm/arm-mve-builtins.cc
@@ -670,6 +670,10 @@  function_instance::has_inactive_argument () const
     return false;
 
   if (mode_suffix_id == MODE_r
+      || base == functions::vcmlaq
+      || base == functions::vcmlaq_rot90
+      || base == functions::vcmlaq_rot180
+      || base == functions::vcmlaq_rot270
       || base == functions::vcmpeqq
       || base == functions::vcmpneq
       || base == functions::vcmpgeq
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index b9d3a876369..88b2e77ffd9 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -159,18 +159,10 @@ 
 #define vcvtq_m(__inactive, __a, __p) __arm_vcvtq_m(__inactive, __a, __p)
 #define vcvtbq_m(__a, __b, __p) __arm_vcvtbq_m(__a, __b, __p)
 #define vcvttq_m(__a, __b, __p) __arm_vcvttq_m(__a, __b, __p)
-#define vcmlaq(__a, __b, __c) __arm_vcmlaq(__a, __b, __c)
-#define vcmlaq_rot180(__a, __b, __c) __arm_vcmlaq_rot180(__a, __b, __c)
-#define vcmlaq_rot270(__a, __b, __c) __arm_vcmlaq_rot270(__a, __b, __c)
-#define vcmlaq_rot90(__a, __b, __c) __arm_vcmlaq_rot90(__a, __b, __c)
 #define vcvtmq_m(__inactive, __a, __p) __arm_vcvtmq_m(__inactive, __a, __p)
 #define vcvtnq_m(__inactive, __a, __p) __arm_vcvtnq_m(__inactive, __a, __p)
 #define vcvtpq_m(__inactive, __a, __p) __arm_vcvtpq_m(__inactive, __a, __p)
 #define vcvtq_m_n(__inactive, __a, __imm6, __p) __arm_vcvtq_m_n(__inactive, __a, __imm6, __p)
-#define vcmlaq_m(__a, __b, __c, __p) __arm_vcmlaq_m(__a, __b, __c, __p)
-#define vcmlaq_rot180_m(__a, __b, __c, __p) __arm_vcmlaq_rot180_m(__a, __b, __c, __p)
-#define vcmlaq_rot270_m(__a, __b, __c, __p) __arm_vcmlaq_rot270_m(__a, __b, __c, __p)
-#define vcmlaq_rot90_m(__a, __b, __c, __p) __arm_vcmlaq_rot90_m(__a, __b, __c, __p)
 #define vcvtq_x(__a, __p) __arm_vcvtq_x(__a, __p)
 #define vcvtq_x_n(__a, __imm6, __p) __arm_vcvtq_x_n(__a, __imm6, __p)
 
@@ -286,10 +278,6 @@ 
 #define vcvtbq_m_f32_f16(__inactive, __a, __p) __arm_vcvtbq_m_f32_f16(__inactive, __a, __p)
 #define vcvttq_m_f16_f32(__a, __b, __p) __arm_vcvttq_m_f16_f32(__a, __b, __p)
 #define vcvttq_m_f32_f16(__inactive, __a, __p) __arm_vcvttq_m_f32_f16(__inactive, __a, __p)
-#define vcmlaq_f16(__a, __b, __c) __arm_vcmlaq_f16(__a, __b, __c)
-#define vcmlaq_rot180_f16(__a, __b, __c) __arm_vcmlaq_rot180_f16(__a, __b, __c)
-#define vcmlaq_rot270_f16(__a, __b, __c) __arm_vcmlaq_rot270_f16(__a, __b, __c)
-#define vcmlaq_rot90_f16(__a, __b, __c) __arm_vcmlaq_rot90_f16(__a, __b, __c)
 #define vcvtmq_m_s16_f16(__inactive, __a, __p) __arm_vcvtmq_m_s16_f16(__inactive, __a, __p)
 #define vcvtnq_m_s16_f16(__inactive, __a, __p) __arm_vcvtnq_m_s16_f16(__inactive, __a, __p)
 #define vcvtpq_m_s16_f16(__inactive, __a, __p) __arm_vcvtpq_m_s16_f16(__inactive, __a, __p)
@@ -298,10 +286,6 @@ 
 #define vcvtnq_m_u16_f16(__inactive, __a, __p) __arm_vcvtnq_m_u16_f16(__inactive, __a, __p)
 #define vcvtpq_m_u16_f16(__inactive, __a, __p) __arm_vcvtpq_m_u16_f16(__inactive, __a, __p)
 #define vcvtq_m_u16_f16(__inactive, __a, __p) __arm_vcvtq_m_u16_f16(__inactive, __a, __p)
-#define vcmlaq_f32(__a, __b, __c) __arm_vcmlaq_f32(__a, __b, __c)
-#define vcmlaq_rot180_f32(__a, __b, __c) __arm_vcmlaq_rot180_f32(__a, __b, __c)
-#define vcmlaq_rot270_f32(__a, __b, __c) __arm_vcmlaq_rot270_f32(__a, __b, __c)
-#define vcmlaq_rot90_f32(__a, __b, __c) __arm_vcmlaq_rot90_f32(__a, __b, __c)
 #define vcvtmq_m_s32_f32(__inactive, __a, __p) __arm_vcvtmq_m_s32_f32(__inactive, __a, __p)
 #define vcvtnq_m_s32_f32(__inactive, __a, __p) __arm_vcvtnq_m_s32_f32(__inactive, __a, __p)
 #define vcvtpq_m_s32_f32(__inactive, __a, __p) __arm_vcvtpq_m_s32_f32(__inactive, __a, __p)
@@ -344,14 +328,6 @@ 
 #define vmulltq_poly_m_p16(__inactive, __a, __b, __p) __arm_vmulltq_poly_m_p16(__inactive, __a, __b, __p)
 #define vbicq_m_f32(__inactive, __a, __b, __p) __arm_vbicq_m_f32(__inactive, __a, __b, __p)
 #define vbicq_m_f16(__inactive, __a, __b, __p) __arm_vbicq_m_f16(__inactive, __a, __b, __p)
-#define vcmlaq_m_f32(__a, __b, __c, __p) __arm_vcmlaq_m_f32(__a, __b, __c, __p)
-#define vcmlaq_m_f16(__a, __b, __c, __p) __arm_vcmlaq_m_f16(__a, __b, __c, __p)
-#define vcmlaq_rot180_m_f32(__a, __b, __c, __p) __arm_vcmlaq_rot180_m_f32(__a, __b, __c, __p)
-#define vcmlaq_rot180_m_f16(__a, __b, __c, __p) __arm_vcmlaq_rot180_m_f16(__a, __b, __c, __p)
-#define vcmlaq_rot270_m_f32(__a, __b, __c, __p) __arm_vcmlaq_rot270_m_f32(__a, __b, __c, __p)
-#define vcmlaq_rot270_m_f16(__a, __b, __c, __p) __arm_vcmlaq_rot270_m_f16(__a, __b, __c, __p)
-#define vcmlaq_rot90_m_f32(__a, __b, __c, __p) __arm_vcmlaq_rot90_m_f32(__a, __b, __c, __p)
-#define vcmlaq_rot90_m_f16(__a, __b, __c, __p) __arm_vcmlaq_rot90_m_f16(__a, __b, __c, __p)
 #define vcvtq_m_n_s32_f32(__inactive, __a,  __imm6, __p) __arm_vcvtq_m_n_s32_f32(__inactive, __a,  __imm6, __p)
 #define vcvtq_m_n_s16_f16(__inactive, __a,  __imm6, __p) __arm_vcvtq_m_n_s16_f16(__inactive, __a,  __imm6, __p)
 #define vcvtq_m_n_u32_f32(__inactive, __a,  __imm6, __p) __arm_vcvtq_m_n_u32_f32(__inactive, __a,  __imm6, __p)
@@ -4645,34 +4621,6 @@  __arm_vcvttq_m_f32_f16 (float32x4_t __inactive, float16x8_t __a, mve_pred16_t __
   return __builtin_mve_vcvttq_m_f32_f16v4sf (__inactive, __a, __p);
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmlaq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
-{
-  return __builtin_mve_vcmlaqv8hf (__a, __b, __c);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmlaq_rot180_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
-{
-  return __builtin_mve_vcmlaq_rot180v8hf (__a, __b, __c);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmlaq_rot270_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
-{
-  return __builtin_mve_vcmlaq_rot270v8hf (__a, __b, __c);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmlaq_rot90_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
-{
-  return __builtin_mve_vcmlaq_rot90v8hf (__a, __b, __c);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvtmq_m_s16_f16 (int16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
@@ -4729,34 +4677,6 @@  __arm_vcvtq_m_u16_f16 (uint16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
   return __builtin_mve_vcvtq_m_from_f_uv8hi (__inactive, __a, __p);
 }
 
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmlaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
-{
-  return __builtin_mve_vcmlaqv4sf (__a, __b, __c);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmlaq_rot180_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
-{
-  return __builtin_mve_vcmlaq_rot180v4sf (__a, __b, __c);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmlaq_rot270_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
-{
-  return __builtin_mve_vcmlaq_rot270v4sf (__a, __b, __c);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmlaq_rot90_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
-{
-  return __builtin_mve_vcmlaq_rot90v4sf (__a, __b, __c);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvtmq_m_s32_f32 (int32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
@@ -4855,62 +4775,6 @@  __arm_vbicq_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve
   return __builtin_mve_vbicq_m_fv8hf (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmlaq_m_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmlaq_m_fv4sf (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmlaq_m_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmlaq_m_fv8hf (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmlaq_rot180_m_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmlaq_rot180_m_fv4sf (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmlaq_rot180_m_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmlaq_rot180_m_fv8hf (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmlaq_rot270_m_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmlaq_rot270_m_fv4sf (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmlaq_rot270_m_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmlaq_rot270_m_fv8hf (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmlaq_rot90_m_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmlaq_rot90_m_fv4sf (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmlaq_rot90_m_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmlaq_rot90_m_fv8hf (__a, __b, __c, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvtq_m_n_s32_f32 (int32x4_t __inactive, float32x4_t __a, const int __imm6, mve_pred16_t __p)
@@ -8481,34 +8345,6 @@  __arm_vcvttq_m (float32x4_t __inactive, float16x8_t __a, mve_pred16_t __p)
  return __arm_vcvttq_m_f32_f16 (__inactive, __a, __p);
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmlaq (float16x8_t __a, float16x8_t __b, float16x8_t __c)
-{
- return __arm_vcmlaq_f16 (__a, __b, __c);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmlaq_rot180 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
-{
- return __arm_vcmlaq_rot180_f16 (__a, __b, __c);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmlaq_rot270 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
-{
- return __arm_vcmlaq_rot270_f16 (__a, __b, __c);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmlaq_rot90 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
-{
- return __arm_vcmlaq_rot90_f16 (__a, __b, __c);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvtmq_m (int16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
@@ -8565,34 +8401,6 @@  __arm_vcvtq_m (uint16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
  return __arm_vcvtq_m_u16_f16 (__inactive, __a, __p);
 }
 
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmlaq (float32x4_t __a, float32x4_t __b, float32x4_t __c)
-{
- return __arm_vcmlaq_f32 (__a, __b, __c);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmlaq_rot180 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
-{
- return __arm_vcmlaq_rot180_f32 (__a, __b, __c);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmlaq_rot270 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
-{
- return __arm_vcmlaq_rot270_f32 (__a, __b, __c);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmlaq_rot90 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
-{
- return __arm_vcmlaq_rot90_f32 (__a, __b, __c);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvtmq_m (int32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
@@ -8691,62 +8499,6 @@  __arm_vbicq_m (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pre
  return __arm_vbicq_m_f16 (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmlaq_m (float32x4_t __a, float32x4_t __b, float32x4_t __c, mve_pred16_t __p)
-{
- return __arm_vcmlaq_m_f32 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmlaq_m (float16x8_t __a, float16x8_t __b, float16x8_t __c, mve_pred16_t __p)
-{
- return __arm_vcmlaq_m_f16 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmlaq_rot180_m (float32x4_t __a, float32x4_t __b, float32x4_t __c, mve_pred16_t __p)
-{
- return __arm_vcmlaq_rot180_m_f32 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmlaq_rot180_m (float16x8_t __a, float16x8_t __b, float16x8_t __c, mve_pred16_t __p)
-{
- return __arm_vcmlaq_rot180_m_f16 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmlaq_rot270_m (float32x4_t __a, float32x4_t __b, float32x4_t __c, mve_pred16_t __p)
-{
- return __arm_vcmlaq_rot270_m_f32 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmlaq_rot270_m (float16x8_t __a, float16x8_t __b, float16x8_t __c, mve_pred16_t __p)
-{
- return __arm_vcmlaq_rot270_m_f16 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmlaq_rot90_m (float32x4_t __a, float32x4_t __b, float32x4_t __c, mve_pred16_t __p)
-{
- return __arm_vcmlaq_rot90_m_f32 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmlaq_rot90_m (float16x8_t __a, float16x8_t __b, float16x8_t __c, mve_pred16_t __p)
-{
- return __arm_vcmlaq_rot90_m_f16 (__a, __b, __c, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvtq_m_n (int32x4_t __inactive, float32x4_t __a, const int __imm6, mve_pred16_t __p)
@@ -9620,34 +9372,6 @@  extern void *__ARM_undef;
   int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcvtq_m_n_f16_u16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \
   int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcvtq_m_n_f32_u32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2, p3));})
 
-#define __arm_vcmlaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmlaq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t)), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmlaq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t)));})
-
-#define __arm_vcmlaq_rot180(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmlaq_rot180_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t)), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmlaq_rot180_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t)));})
-
-#define __arm_vcmlaq_rot270(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmlaq_rot270_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t)), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmlaq_rot270_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t)));})
-
-#define __arm_vcmlaq_rot90(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmlaq_rot90_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t)), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmlaq_rot90_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t)));})
-
 #define __arm_vcvtbq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -9697,34 +9421,6 @@  extern void *__ARM_undef;
   int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vbicq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
   int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vbicq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
 
-#define __arm_vcmlaq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmlaq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmlaq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
-
-#define __arm_vcmlaq_rot180_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmlaq_rot180_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmlaq_rot180_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
-
-#define __arm_vcmlaq_rot270_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmlaq_rot270_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmlaq_rot270_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
-
-#define __arm_vcmlaq_rot90_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmlaq_rot90_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmlaq_rot90_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
-
 #define __arm_vornq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \