@@ -18223,12 +18223,6 @@ aarch64_override_options_internal (struct gcc_options *opts)
SET_OPTION_IF_UNSET (opts, &global_options_set,
param_sched_autopref_queue_depth, queue_depth);
- /* If using Advanced SIMD only for autovectorization disable SVE vector costs
- comparison. */
- if (aarch64_autovec_preference == 1)
- SET_OPTION_IF_UNSET (opts, &global_options_set,
- aarch64_sve_compare_costs, 0);
-
/* Set up parameters to be used in prefetching algorithm. Do not
override the defaults unless we are tuning for a core we have
researched values for. */
@@ -22138,12 +22132,7 @@ aarch64_autovectorize_vector_modes (vector_modes *modes, bool)
modes->safe_push (sve_modes[sve_i++]);
unsigned int flags = 0;
- /* Consider enabling VECT_COMPARE_COSTS for SVE, both so that we
- can compare SVE against Advanced SIMD and so that we can compare
- multiple SVE vectorization approaches against each other. There's
- not really any point doing this for Advanced SIMD only, since the
- first mode that works should always be the best. */
- if (TARGET_SVE && aarch64_sve_compare_costs)
+ if (aarch64_vect_compare_costs)
flags |= VECT_COMPARE_COSTS;
return flags;
}
@@ -332,9 +332,10 @@ moutline-atomics
Target Var(aarch64_flag_outline_atomics) Init(2) Save
Generate local calls to out-of-line atomic operations.
--param=aarch64-sve-compare-costs=
-Target Joined UInteger Var(aarch64_sve_compare_costs) Init(1) IntegerRange(0, 1) Param
-When vectorizing for SVE, consider using unpacked vectors for smaller elements and use the cost model to pick the cheapest approach. Also use the cost model to choose between SVE and Advanced SIMD vectorization.
+-param=aarch64-vect-compare-costs=
+Target Joined UInteger Var(aarch64_vect_compare_costs) Init(1) IntegerRange(0, 1) Param
+When vectorizing, consider using multiple different approaches and use
+the cost model to choose the cheapest one.
-param=aarch64-float-recp-precision=
Target Joined UInteger Var(aarch64_float_recp_precision) Init(1) IntegerRange(1, 5) Param
@@ -16778,14 +16778,23 @@ With @option{--param=openacc-privatization=noisy}, do diagnose.
The following choices of @var{name} are available on AArch64 targets:
@table @gcctabopt
-@item aarch64-sve-compare-costs
-When vectorizing for SVE, consider using ``unpacked'' vectors for
-smaller elements and use the cost model to pick the cheapest approach.
-Also use the cost model to choose between SVE and Advanced SIMD vectorization.
-
-Using unpacked vectors includes storing smaller elements in larger
-containers and accessing elements with extending loads and truncating
-stores.
+@item aarch64-vect-compare-costs
+When vectorizing, consider using multiple different approaches and use
+the cost model to choose the cheapest one. This includes:
+
+@itemize
+@item
+Trying both SVE and Advanced SIMD, when SVE is available.
+
+@item
+Trying to use 64-bit Advanced SIMD vectors for the smallest data elements,
+rather than using 128-bit vectors for everything.
+
+@item
+Trying to use ``unpacked'' SVE vectors for smaller elements. This includes
+storing smaller elements in larger containers and accessing elements with
+extending loads and truncating stores.
+@end itemize
@item aarch64-float-recp-precision
The number of Newton iterations for calculating the reciprocal for float type.
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-options "-O3" } */
+
+#pragma GCC target "+nosve"
+
+int test(unsigned array[4][4]);
+
+int foo(unsigned short *a, unsigned long n)
+{
+ unsigned array[4][4];
+
+ for (unsigned i = 0; i < 4; i++, a += n)
+ {
+ array[i][0] = a[0] << 6;
+ array[i][1] = a[1] << 6;
+ array[i][2] = a[2] << 6;
+ array[i][3] = a[3] << 6;
+ }
+
+ return test(array);
+}
+
+/* { dg-final { scan-assembler-times {\tushll\t} 4 } } */
+/* { dg-final { scan-assembler-not {\tzip.\t} } } */
+/* { dg-final { scan-assembler-not {\tins\t} } } */
+/* { dg-final { scan-assembler-not {\tshl\t} } } */
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize --param aarch64-sve-compare-costs=0" } */
+/* { dg-options "-O2 -ftree-vectorize --param aarch64-vect-compare-costs=0" } */
#include <stdint.h>
@@ -1,5 +1,5 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize --param aarch64-sve-compare-costs=0" } */
+/* { dg-options "-O2 -ftree-vectorize --param aarch64-vect-compare-costs=0" } */
#include "cond_arith_1.c"
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize --param aarch64-sve-compare-costs=0" } */
+/* { dg-options "-O2 -ftree-vectorize --param aarch64-vect-compare-costs=0" } */
#include <stdint.h>
@@ -1,5 +1,5 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize --param aarch64-sve-compare-costs=0" } */
+/* { dg-options "-O2 -ftree-vectorize --param aarch64-vect-compare-costs=0" } */
#include "cond_arith_3.c"
@@ -1,5 +1,5 @@
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize -fwrapv --save-temps --param aarch64-sve-compare-costs=0" } */
+/* { dg-options "-O2 -ftree-vectorize -fwrapv --save-temps --param aarch64-vect-compare-costs=0" } */
#include <stdint.h>
@@ -1,5 +1,5 @@
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps --param aarch64-sve-compare-costs=0" } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps --param aarch64-vect-compare-costs=0" } */
#define INDEX16 uint16_t
#define INDEX32 uint32_t
@@ -1,5 +1,5 @@
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize -save-temps --param aarch64-sve-compare-costs=0" } */
+/* { dg-options "-O2 -ftree-vectorize -save-temps --param aarch64-vect-compare-costs=0" } */
#include <stdint.h>
@@ -1,5 +1,5 @@
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize -save-temps -msve-vector-bits=256 --param aarch64-sve-compare-costs=0" } */
+/* { dg-options "-O2 -ftree-vectorize -save-temps -msve-vector-bits=256 --param aarch64-vect-compare-costs=0" } */
#include "load_const_offset_2.c"
@@ -1,5 +1,5 @@
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize -ffast-math --save-temps --param aarch64-sve-compare-costs=0" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math --save-temps --param aarch64-vect-compare-costs=0" } */
#include <stdint.h>
@@ -1,5 +1,5 @@
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize -ffast-math --save-temps --param aarch64-sve-compare-costs=0" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math --save-temps --param aarch64-vect-compare-costs=0" } */
#include <stdint.h>
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize --param aarch64-sve-compare-costs=0" } */
+/* { dg-options "-O2 -ftree-vectorize --param aarch64-vect-compare-costs=0" } */
#include <stdint.h>
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-sve-compare-costs=0" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-vect-compare-costs=0" } */
#include <stdint.h>
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-sve-compare-costs=0" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-vect-compare-costs=0" } */
#include <stdint.h>
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-sve-compare-costs=0" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-vect-compare-costs=0" } */
#include <stdint.h>
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-sve-compare-costs=0" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-vect-compare-costs=0" } */
#include <stdint.h>
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-sve-compare-costs=0" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-vect-compare-costs=0" } */
#include <stdint.h>
@@ -1,5 +1,5 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-sve-compare-costs=0" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-vect-compare-costs=0" } */
#include "mask_struct_store_1.c"
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-sve-compare-costs=0" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-vect-compare-costs=0" } */
#include <stdint.h>
@@ -1,5 +1,5 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-sve-compare-costs=0" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-vect-compare-costs=0" } */
#include "mask_struct_store_2.c"
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize --param aarch64-sve-compare-costs=0" } */
+/* { dg-options "-O2 -ftree-vectorize --param aarch64-vect-compare-costs=0" } */
#include <stdint.h>
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-sve-compare-costs=0" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-vect-compare-costs=0" } */
double
f (double *restrict a, double *restrict b, int *lookup)
@@ -1,5 +1,5 @@
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize -fwrapv --save-temps --param aarch64-sve-compare-costs=0" } */
+/* { dg-options "-O2 -ftree-vectorize -fwrapv --save-temps --param aarch64-vect-compare-costs=0" } */
#include <stdint.h>
@@ -1,5 +1,5 @@
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps --param aarch64-sve-compare-costs=0" } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps --param aarch64-vect-compare-costs=0" } */
#define INDEX16 uint16_t
#define INDEX32 uint32_t
@@ -1,5 +1,5 @@
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps --param aarch64-sve-compare-costs=0" } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps --param aarch64-vect-compare-costs=0" } */
#include <stdint.h>
@@ -1,5 +1,5 @@
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps --param aarch64-sve-compare-costs=0" } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps --param aarch64-vect-compare-costs=0" } */
#include <stdint.h>
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline --param aarch64-sve-compare-costs=0" } */
+/* { dg-options "-O2 -ftree-vectorize -fno-inline --param aarch64-vect-compare-costs=0" } */
#include <stdint.h>
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize --param aarch64-sve-compare-costs=0" } */
+/* { dg-options "-O2 -ftree-vectorize --param aarch64-vect-compare-costs=0" } */
#include <stdint.h>
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline --param aarch64-sve-compare-costs=0" } */
+/* { dg-options "-O2 -ftree-vectorize -fno-inline --param aarch64-vect-compare-costs=0" } */
#include <stdint.h>
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline --param aarch64-sve-compare-costs=0" } */
+/* { dg-options "-O2 -ftree-vectorize -fno-inline --param aarch64-vect-compare-costs=0" } */
#include <stdint.h>
@@ -1,5 +1,5 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline --param aarch64-sve-compare-costs=0" } */
+/* { dg-options "-O2 -ftree-vectorize -fno-inline --param aarch64-vect-compare-costs=0" } */
#include "unpack_unsigned_1.c"
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --param aarch64-sve-compare-costs=0" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --param aarch64-vect-compare-costs=0" } */
#include <stdint.h>
@@ -1,5 +1,5 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --param aarch64-sve-compare-costs=0" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --param aarch64-vect-compare-costs=0" } */
#include "vcond_11.c"