new file mode 100644
@@ -0,0 +1,201 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_condition } */
+
+#include "tree-vect.h"
+
+#define N 27
+
+#define COND_REDUC(NAME, TYPE, OP) \
+ TYPE __attribute__ ((noipa)) \
+ cond_##NAME (TYPE *__restrict a, int *__restrict cond1, \
+ int *__restrict cond2, TYPE init, int n) \
+ { \
+ TYPE result = init; \
+ for (int i = 0; i < n; i++) \
+ if (cond1[i] > cond2[i]) \
+ result OP a[i]; \
+ return result; \
+ }
+
+COND_REDUC (reduc_sum_char, char, +=)
+COND_REDUC (reduc_sum_short, short, +=)
+COND_REDUC (reduc_sum_int, int, +=)
+COND_REDUC (reduc_sum_long, long, +=)
+COND_REDUC (reduc_and_char, char, &=)
+COND_REDUC (reduc_and_short, short, &=)
+COND_REDUC (reduc_and_int, int, &=)
+COND_REDUC (reduc_and_long, long, &=)
+COND_REDUC (reduc_ior_char, char, |=)
+COND_REDUC (reduc_ior_short, short, |=)
+COND_REDUC (reduc_ior_int, int, |=)
+COND_REDUC (reduc_ior_long, long, |=)
+COND_REDUC (reduc_xor_char, char, ^=)
+COND_REDUC (reduc_xor_short, short, ^=)
+COND_REDUC (reduc_xor_int, int, ^=)
+COND_REDUC (reduc_xor_long, long, ^=)
+
+int
+main (void)
+{
+ check_vect ();
+ int cond1[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1, 2, 3, 4,
+ 5, 6, 7, 8, 9, 10, 21, 22, 23, 24, 25, 26, 27};
+ int cond2[N] = {15, 5, 6, 7, 8, 9, 16, 17, 18, 19, 20, 11, 12, 10,
+ 21, 22, 23, 24, 25, 26, 27, 1, 2, 3, 4, 13, 14};
+
+ char a_char[N]
+ = {55, 66, 77, -88, 111, -9, 109, 37, 23, -101, 22, 37, -56, 67,
+ 70, 50, -95, 87, 73, -123, -59, 107, 91, 27, 72, 29, 58};
+ short a_short[N] = {551, 662, 773, -881, 1113, -97, 1094, 378, 237,
+ -1013, 224, 376, -562, 673, 705, 508, -956, 877,
+ 734, -1235, -590, 1071, 910, 270, 726, 298, 589};
+ int a_int[N]
+ = {5510, 6626, 7738, -8819, 11133, -974, 10947, 3789, 2373,
+ -10132, 2245, 3767, -5627, 6738, 7059, 5081, -9567, 8777,
+ 7345, -12350, -5909, 10710, 9104, 2704, 7263, 2987, 5898};
+ long a_long[N]
+ = {55106, 66266, 77387, -88198, 111339, -9740, 109475, 37890, 23730,
+ -101326, 22457, 37679, -56270, 67383, 70593, 50813, -95677, 87773,
+ 73457, -123501, -59091, 107101, 91049, 27049, 72639, 29877, 58987};
+
+ char init_char = 7;
+ short init_short = 77;
+ int init_int = 777;
+ long init_long = 777;
+
+#define COND_REDUC_GOLDEN(NAME, TYPE, OP) \
+ for (int i = 0; i < N; i++) \
+ if (cond1[i] > cond2[i]) \
+ result_gold_##NAME OP a_##TYPE[i];
+
+ char result_reduc_sum_char
+ = cond_reduc_sum_char (a_char, cond1, cond2, init_char, N);
+ char result_gold_reduc_sum_char = init_char;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_sum_char, char, +=)
+ if (result_reduc_sum_char != result_gold_reduc_sum_char)
+ __builtin_abort ();
+
+ short result_reduc_sum_short
+ = cond_reduc_sum_short (a_short, cond1, cond2, init_short, N);
+ short result_gold_reduc_sum_short = init_short;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_sum_short, short, +=)
+ if (result_reduc_sum_short != result_gold_reduc_sum_short)
+ __builtin_abort ();
+
+ int result_reduc_sum_int
+ = cond_reduc_sum_int (a_int, cond1, cond2, init_int, N);
+ int result_gold_reduc_sum_int = init_int;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_sum_int, int, +=)
+ if (result_reduc_sum_int != result_gold_reduc_sum_int)
+ __builtin_abort ();
+
+ long result_reduc_sum_long
+ = cond_reduc_sum_long (a_long, cond1, cond2, init_long, N);
+ long result_gold_reduc_sum_long = init_long;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_sum_long, long, +=)
+ if (result_reduc_sum_long != result_gold_reduc_sum_long)
+ __builtin_abort ();
+
+ char result_reduc_and_char
+ = cond_reduc_and_char (a_char, cond1, cond2, init_char, N);
+ char result_gold_reduc_and_char = init_char;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_and_char, char, &=)
+ if (result_reduc_and_char != result_gold_reduc_and_char)
+ __builtin_abort ();
+
+ short result_reduc_and_short
+ = cond_reduc_and_short (a_short, cond1, cond2, init_short, N);
+ short result_gold_reduc_and_short = init_short;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_and_short, short, &=)
+ if (result_reduc_and_short != result_gold_reduc_and_short)
+ __builtin_abort ();
+
+ int result_reduc_and_int
+ = cond_reduc_and_int (a_int, cond1, cond2, init_int, N);
+ int result_gold_reduc_and_int = init_int;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_and_int, int, &=)
+ if (result_reduc_and_int != result_gold_reduc_and_int)
+ __builtin_abort ();
+
+ long result_reduc_and_long
+ = cond_reduc_and_long (a_long, cond1, cond2, init_long, N);
+ long result_gold_reduc_and_long = init_long;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_and_long, long, &=)
+ if (result_reduc_and_long != result_gold_reduc_and_long)
+ __builtin_abort ();
+
+ char result_reduc_ior_char
+ = cond_reduc_ior_char (a_char, cond1, cond2, init_char, N);
+ char result_gold_reduc_ior_char = init_char;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_ior_char, char, |=)
+ if (result_reduc_ior_char != result_gold_reduc_ior_char)
+ __builtin_abort ();
+
+ short result_reduc_ior_short
+ = cond_reduc_ior_short (a_short, cond1, cond2, init_short, N);
+ short result_gold_reduc_ior_short = init_short;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_ior_short, short, |=)
+ if (result_reduc_ior_short != result_gold_reduc_ior_short)
+ __builtin_abort ();
+
+ int result_reduc_ior_int
+ = cond_reduc_ior_int (a_int, cond1, cond2, init_int, N);
+ int result_gold_reduc_ior_int = init_int;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_ior_int, int, |=)
+ if (result_reduc_ior_int != result_gold_reduc_ior_int)
+ __builtin_abort ();
+
+ long result_reduc_ior_long
+ = cond_reduc_ior_long (a_long, cond1, cond2, init_long, N);
+ long result_gold_reduc_ior_long = init_long;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_ior_long, long, |=)
+ if (result_reduc_ior_long != result_gold_reduc_ior_long)
+ __builtin_abort ();
+
+ char result_reduc_xor_char
+ = cond_reduc_xor_char (a_char, cond1, cond2, init_char, N);
+ char result_gold_reduc_xor_char = init_char;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_xor_char, char, ^=)
+ if (result_reduc_xor_char != result_gold_reduc_xor_char)
+ __builtin_abort ();
+
+ short result_reduc_xor_short
+ = cond_reduc_xor_short (a_short, cond1, cond2, init_short, N);
+ short result_gold_reduc_xor_short = init_short;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_xor_short, short, ^=)
+ if (result_reduc_xor_short != result_gold_reduc_xor_short)
+ __builtin_abort ();
+
+ int result_reduc_xor_int
+ = cond_reduc_xor_int (a_int, cond1, cond2, init_int, N);
+ int result_gold_reduc_xor_int = init_int;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_xor_int, int, ^=)
+ if (result_reduc_xor_int != result_gold_reduc_xor_int)
+ __builtin_abort ();
+
+ long result_reduc_xor_long
+ = cond_reduc_xor_long (a_long, cond1, cond2, init_long, N);
+ long result_gold_reduc_xor_long = init_long;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_xor_long, long, ^=)
+ if (result_reduc_xor_long != result_gold_reduc_xor_long)
+ __builtin_abort ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 16 "vect" } } */
new file mode 100644
@@ -0,0 +1,202 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_condition } */
+/* { dg-additional-options "-fwrapv -fno-trapv" } */
+
+#include "tree-vect.h"
+
+#define N 27
+
+#define COND_REDUC(NAME, TYPE, OP1, OP2) \
+ TYPE __attribute__ ((noipa)) \
+ cond_##NAME (TYPE *__restrict a, int *__restrict cond1, \
+ int *__restrict cond2, TYPE init, int n) \
+ { \
+ TYPE result = init; \
+ for (int i = 0; i < n; i++) \
+ if (cond1[i] > cond2[i]) \
+ result OP1 a[i] OP2 init; \
+ return result; \
+ }
+
+COND_REDUC (reduc_sum_char, char, +=, +)
+COND_REDUC (reduc_sum_short, short, +=, +)
+COND_REDUC (reduc_sum_int, int, +=, +)
+COND_REDUC (reduc_sum_long, long, +=, +)
+COND_REDUC (reduc_and_char, char, &=, &)
+COND_REDUC (reduc_and_short, short, &=, &)
+COND_REDUC (reduc_and_int, int, &=, &)
+COND_REDUC (reduc_and_long, long, &=, &)
+COND_REDUC (reduc_ior_char, char, |=, |)
+COND_REDUC (reduc_ior_short, short, |=, |)
+COND_REDUC (reduc_ior_int, int, |=, |)
+COND_REDUC (reduc_ior_long, long, |=, |)
+COND_REDUC (reduc_xor_char, char, ^=, ^)
+COND_REDUC (reduc_xor_short, short, ^=, ^)
+COND_REDUC (reduc_xor_int, int, ^=, ^)
+COND_REDUC (reduc_xor_long, long, ^=, ^)
+
+int
+main (void)
+{
+ check_vect ();
+ int cond1[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1, 2, 3, 4,
+ 5, 6, 7, 8, 9, 10, 21, 22, 23, 24, 25, 26, 27};
+ int cond2[N] = {15, 5, 6, 7, 8, 9, 16, 17, 18, 19, 20, 11, 12, 10,
+ 21, 22, 23, 24, 25, 26, 27, 1, 2, 3, 4, 13, 14};
+
+ char a_char[N]
+ = {55, 66, 77, -88, 111, -9, 109, 37, 23, -101, 22, 37, -56, 67,
+ 70, 50, -95, 87, 73, -123, -59, 107, 91, 27, 72, 29, 58};
+ short a_short[N] = {551, 662, 773, -881, 1113, -97, 1094, 378, 237,
+ -1013, 224, 376, -562, 673, 705, 508, -956, 877,
+ 734, -1235, -590, 1071, 910, 270, 726, 298, 589};
+ int a_int[N]
+ = {5510, 6626, 7738, -8819, 11133, -974, 10947, 3789, 2373,
+ -10132, 2245, 3767, -5627, 6738, 7059, 5081, -9567, 8777,
+ 7345, -12350, -5909, 10710, 9104, 2704, 7263, 2987, 5898};
+ long a_long[N]
+ = {55106, 66266, 77387, -88198, 111339, -9740, 109475, 37890, 23730,
+ -101326, 22457, 37679, -56270, 67383, 70593, 50813, -95677, 87773,
+ 73457, -123501, -59091, 107101, 91049, 27049, 72639, 29877, 58987};
+
+ char init_char = 7;
+ short init_short = 77;
+ int init_int = 777;
+ long init_long = 777;
+
+#define COND_REDUC_GOLDEN(NAME, TYPE, OP1, OP2) \
+ for (int i = 0; i < N; i++) \
+ if (cond1[i] > cond2[i]) \
+ result_gold_##NAME OP1 a_##TYPE[i] OP2 init_##TYPE;
+
+ char result_reduc_sum_char
+ = cond_reduc_sum_char (a_char, cond1, cond2, init_char, N);
+ char result_gold_reduc_sum_char = init_char;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_sum_char, char, +=, +)
+ if (result_reduc_sum_char != result_gold_reduc_sum_char)
+ __builtin_abort ();
+
+ short result_reduc_sum_short
+ = cond_reduc_sum_short (a_short, cond1, cond2, init_short, N);
+ short result_gold_reduc_sum_short = init_short;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_sum_short, short, +=, +)
+ if (result_reduc_sum_short != result_gold_reduc_sum_short)
+ __builtin_abort ();
+
+ int result_reduc_sum_int
+ = cond_reduc_sum_int (a_int, cond1, cond2, init_int, N);
+ int result_gold_reduc_sum_int = init_int;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_sum_int, int, +=, +)
+ if (result_reduc_sum_int != result_gold_reduc_sum_int)
+ __builtin_abort ();
+
+ long result_reduc_sum_long
+ = cond_reduc_sum_long (a_long, cond1, cond2, init_long, N);
+ long result_gold_reduc_sum_long = init_long;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_sum_long, long, +=, +)
+ if (result_reduc_sum_long != result_gold_reduc_sum_long)
+ __builtin_abort ();
+
+ char result_reduc_and_char
+ = cond_reduc_and_char (a_char, cond1, cond2, init_char, N);
+ char result_gold_reduc_and_char = init_char;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_and_char, char, &=, &)
+ if (result_reduc_and_char != result_gold_reduc_and_char)
+ __builtin_abort ();
+
+ short result_reduc_and_short
+ = cond_reduc_and_short (a_short, cond1, cond2, init_short, N);
+ short result_gold_reduc_and_short = init_short;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_and_short, short, &=, &)
+ if (result_reduc_and_short != result_gold_reduc_and_short)
+ __builtin_abort ();
+
+ int result_reduc_and_int
+ = cond_reduc_and_int (a_int, cond1, cond2, init_int, N);
+ int result_gold_reduc_and_int = init_int;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_and_int, int, &=, &)
+ if (result_reduc_and_int != result_gold_reduc_and_int)
+ __builtin_abort ();
+
+ long result_reduc_and_long
+ = cond_reduc_and_long (a_long, cond1, cond2, init_long, N);
+ long result_gold_reduc_and_long = init_long;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_and_long, long, &=, &)
+ if (result_reduc_and_long != result_gold_reduc_and_long)
+ __builtin_abort ();
+
+ char result_reduc_ior_char
+ = cond_reduc_ior_char (a_char, cond1, cond2, init_char, N);
+ char result_gold_reduc_ior_char = init_char;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_ior_char, char, |=, |)
+ if (result_reduc_ior_char != result_gold_reduc_ior_char)
+ __builtin_abort ();
+
+ short result_reduc_ior_short
+ = cond_reduc_ior_short (a_short, cond1, cond2, init_short, N);
+ short result_gold_reduc_ior_short = init_short;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_ior_short, short, |=, |)
+ if (result_reduc_ior_short != result_gold_reduc_ior_short)
+ __builtin_abort ();
+
+ int result_reduc_ior_int
+ = cond_reduc_ior_int (a_int, cond1, cond2, init_int, N);
+ int result_gold_reduc_ior_int = init_int;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_ior_int, int, |=, |)
+ if (result_reduc_ior_int != result_gold_reduc_ior_int)
+ __builtin_abort ();
+
+ long result_reduc_ior_long
+ = cond_reduc_ior_long (a_long, cond1, cond2, init_long, N);
+ long result_gold_reduc_ior_long = init_long;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_ior_long, long, |=, |)
+ if (result_reduc_ior_long != result_gold_reduc_ior_long)
+ __builtin_abort ();
+
+ char result_reduc_xor_char
+ = cond_reduc_xor_char (a_char, cond1, cond2, init_char, N);
+ char result_gold_reduc_xor_char = init_char;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_xor_char, char, ^=, ^)
+ if (result_reduc_xor_char != result_gold_reduc_xor_char)
+ __builtin_abort ();
+
+ short result_reduc_xor_short
+ = cond_reduc_xor_short (a_short, cond1, cond2, init_short, N);
+ short result_gold_reduc_xor_short = init_short;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_xor_short, short, ^=, ^)
+ if (result_reduc_xor_short != result_gold_reduc_xor_short)
+ __builtin_abort ();
+
+ int result_reduc_xor_int
+ = cond_reduc_xor_int (a_int, cond1, cond2, init_int, N);
+ int result_gold_reduc_xor_int = init_int;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_xor_int, int, ^=, ^)
+ if (result_reduc_xor_int != result_gold_reduc_xor_int)
+ __builtin_abort ();
+
+ long result_reduc_xor_long
+ = cond_reduc_xor_long (a_long, cond1, cond2, init_long, N);
+ long result_gold_reduc_xor_long = init_long;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_xor_long, long, ^=, ^)
+ if (result_reduc_xor_long != result_gold_reduc_xor_long)
+ __builtin_abort ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 16 "vect" } } */
new file mode 100644
@@ -0,0 +1,314 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_condition } */
+/* { dg-additional-options "-fwrapv -fno-trapv" } */
+
+#include "tree-vect.h"
+
+#define N 27
+
+#define COND_REDUC(NAME, TYPE, OP1, OP2) \
+ TYPE __attribute__ ((noipa)) \
+ cond_##NAME (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c, \
+ TYPE *__restrict d, TYPE *__restrict e, TYPE *__restrict f, \
+ TYPE *__restrict g, int *__restrict cond1, \
+ int *__restrict cond2, TYPE init, int n) \
+ { \
+ TYPE result = init; \
+ for (int i = 0; i < n; i++) \
+ if (cond1[i] > cond2[i]) \
+ result OP1 a[i] OP2 init OP2 \
+ b[i] OP2 c[i] OP2 d[i] OP2 e[i] OP2 f[i] OP2 g[i]; \
+ return result; \
+ }
+
+COND_REDUC (reduc_sum_char, char, +=, +)
+COND_REDUC (reduc_sum_short, short, +=, +)
+COND_REDUC (reduc_sum_int, int, +=, +)
+COND_REDUC (reduc_sum_long, long, +=, +)
+COND_REDUC (reduc_and_char, char, &=, &)
+COND_REDUC (reduc_and_short, short, &=, &)
+COND_REDUC (reduc_and_int, int, &=, &)
+COND_REDUC (reduc_and_long, long, &=, &)
+COND_REDUC (reduc_ior_char, char, |=, |)
+COND_REDUC (reduc_ior_short, short, |=, |)
+COND_REDUC (reduc_ior_int, int, |=, |)
+COND_REDUC (reduc_ior_long, long, |=, |)
+COND_REDUC (reduc_xor_char, char, ^=, ^)
+COND_REDUC (reduc_xor_short, short, ^=, ^)
+COND_REDUC (reduc_xor_int, int, ^=, ^)
+COND_REDUC (reduc_xor_long, long, ^=, ^)
+
+int
+main (void)
+{
+ check_vect ();
+ int cond1[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1, 2, 3, 4,
+ 5, 6, 7, 8, 9, 10, 21, 22, 23, 24, 25, 26, 27};
+ int cond2[N] = {15, 5, 6, 7, 8, 9, 16, 17, 18, 19, 20, 11, 12, 10,
+ 21, 22, 23, 24, 25, 26, 27, 1, 2, 3, 4, 13, 14};
+
+ char a_char[N]
+ = {55, 66, 77, -88, 111, -9, 109, 37, 23, -101, 22, 37, -56, 67,
+ 70, 50, -95, 87, 73, -123, -59, 107, 91, 27, 72, 29, 58};
+ short a_short[N] = {551, 662, 773, -881, 1113, -97, 1094, 378, 237,
+ -1013, 224, 376, -562, 673, 705, 508, -956, 877,
+ 734, -1235, -590, 1071, 910, 270, 726, 298, 589};
+ int a_int[N]
+ = {5510, 6626, 7738, -8819, 11133, -974, 10947, 3789, 2373,
+ -10132, 2245, 3767, -5627, 6738, 7059, 5081, -9567, 8777,
+ 7345, -12350, -5909, 10710, 9104, 2704, 7263, 2987, 5898};
+ long a_long[N]
+ = {55106, 66266, 77387, -88198, 111339, -9740, 109475, 37890, 23730,
+ -101326, 22457, 37679, -56270, 67383, 70593, 50813, -95677, 87773,
+ 73457, -123501, -59091, 107101, 91049, 27049, 72639, 29877, 58987};
+
+ char b_char[N]
+ = {55, 66, 77, -88, 111, -9, 109, 37, 23, -101, 22, 37, -56, 67,
+ 70, 50, -95, 87, 73, -123, -59, 107, 91, 27, 72, 29, 58};
+ short b_short[N] = {551, 662, 773, -881, 1113, -97, 1094, 378, 237,
+ -1013, 224, 376, -562, 673, 705, 508, -956, 877,
+ 734, -1235, -590, 1071, 910, 270, 726, 298, 589};
+ int b_int[N]
+ = {5510, 6626, 7738, -8819, 11133, -974, 10947, 3789, 2373,
+ -10132, 2245, 3767, -5627, 6738, 7059, 5081, -9567, 8777,
+ 7345, -12350, -5909, 10710, 9104, 2704, 7263, 2987, 5898};
+ long b_long[N]
+ = {55106, 66266, 77387, -88198, 111339, -9740, 109475, 37890, 23730,
+ -101326, 22457, 37679, -56270, 67383, 70593, 50813, -95677, 87773,
+ 73457, -123501, -59091, 107101, 91049, 27049, 72639, 29877, 58987};
+
+ char c_char[N]
+ = {55, 66, 77, -88, 111, -9, 109, 37, 23, -101, 22, 37, -56, 67,
+ 70, 50, -95, 87, 73, -123, -59, 107, 91, 27, 72, 29, 58};
+ short c_short[N] = {551, 662, 773, -881, 1113, -97, 1094, 378, 237,
+ -1013, 224, 376, -562, 673, 705, 508, -956, 877,
+ 734, -1235, -590, 1071, 910, 270, 726, 298, 589};
+ int c_int[N]
+ = {5510, 6626, 7738, -8819, 11133, -974, 10947, 3789, 2373,
+ -10132, 2245, 3767, -5627, 6738, 7059, 5081, -9567, 8777,
+ 7345, -12350, -5909, 10710, 9104, 2704, 7263, 2987, 5898};
+ long c_long[N]
+ = {55106, 66266, 77387, -88198, 111339, -9740, 109475, 37890, 23730,
+ -101326, 22457, 37679, -56270, 67383, 70593, 50813, -95677, 87773,
+ 73457, -123501, -59091, 107101, 91049, 27049, 72639, 29877, 58987};
+
+ char d_char[N]
+ = {55, 66, 77, -88, 111, -9, 109, 37, 23, -101, 22, 37, -56, 67,
+ 70, 50, -95, 87, 73, -123, -59, 107, 91, 27, 72, 29, 58};
+ short d_short[N] = {551, 662, 773, -881, 1113, -97, 1094, 378, 237,
+ -1013, 224, 376, -562, 673, 705, 508, -956, 877,
+ 734, -1235, -590, 1071, 910, 270, 726, 298, 589};
+ int d_int[N]
+ = {5510, 6626, 7738, -8819, 11133, -974, 10947, 3789, 2373,
+ -10132, 2245, 3767, -5627, 6738, 7059, 5081, -9567, 8777,
+ 7345, -12350, -5909, 10710, 9104, 2704, 7263, 2987, 5898};
+ long d_long[N]
+ = {55106, 66266, 77387, -88198, 111339, -9740, 109475, 37890, 23730,
+ -101326, 22457, 37679, -56270, 67383, 70593, 50813, -95677, 87773,
+ 73457, -123501, -59091, 107101, 91049, 27049, 72639, 29877, 58987};
+
+ char e_char[N]
+ = {55, 66, 77, -88, 111, -9, 109, 37, 23, -101, 22, 37, -56, 67,
+ 70, 50, -95, 87, 73, -123, -59, 107, 91, 27, 72, 29, 58};
+ short e_short[N] = {551, 662, 773, -881, 1113, -97, 1094, 378, 237,
+ -1013, 224, 376, -562, 673, 705, 508, -956, 877,
+ 734, -1235, -590, 1071, 910, 270, 726, 298, 589};
+ int e_int[N]
+ = {5510, 6626, 7738, -8819, 11133, -974, 10947, 3789, 2373,
+ -10132, 2245, 3767, -5627, 6738, 7059, 5081, -9567, 8777,
+ 7345, -12350, -5909, 10710, 9104, 2704, 7263, 2987, 5898};
+ long e_long[N]
+ = {55106, 66266, 77387, -88198, 111339, -9740, 109475, 37890, 23730,
+ -101326, 22457, 37679, -56270, 67383, 70593, 50813, -95677, 87773,
+ 73457, -123501, -59091, 107101, 91049, 27049, 72639, 29877, 58987};
+
+ char f_char[N]
+ = {55, 66, 77, -88, 111, -9, 109, 37, 23, -101, 22, 37, -56, 67,
+ 70, 50, -95, 87, 73, -123, -59, 107, 91, 27, 72, 29, 58};
+ short f_short[N] = {551, 662, 773, -881, 1113, -97, 1094, 378, 237,
+ -1013, 224, 376, -562, 673, 705, 508, -956, 877,
+ 734, -1235, -590, 1071, 910, 270, 726, 298, 589};
+ int f_int[N]
+ = {5510, 6626, 7738, -8819, 11133, -974, 10947, 3789, 2373,
+ -10132, 2245, 3767, -5627, 6738, 7059, 5081, -9567, 8777,
+ 7345, -12350, -5909, 10710, 9104, 2704, 7263, 2987, 5898};
+ long f_long[N]
+ = {55106, 66266, 77387, -88198, 111339, -9740, 109475, 37890, 23730,
+ -101326, 22457, 37679, -56270, 67383, 70593, 50813, -95677, 87773,
+ 73457, -123501, -59091, 107101, 91049, 27049, 72639, 29877, 58987};
+
+ char g_char[N]
+ = {55, 66, 77, -88, 111, -9, 109, 37, 23, -101, 22, 37, -56, 67,
+ 70, 50, -95, 87, 73, -123, -59, 107, 91, 27, 72, 29, 58};
+ short g_short[N] = {551, 662, 773, -881, 1113, -97, 1094, 378, 237,
+ -1013, 224, 376, -562, 673, 705, 508, -956, 877,
+ 734, -1235, -590, 1071, 910, 270, 726, 298, 589};
+ int g_int[N]
+ = {5510, 6626, 7738, -8819, 11133, -974, 10947, 3789, 2373,
+ -10132, 2245, 3767, -5627, 6738, 7059, 5081, -9567, 8777,
+ 7345, -12350, -5909, 10710, 9104, 2704, 7263, 2987, 5898};
+ long g_long[N]
+ = {55106, 66266, 77387, -88198, 111339, -9740, 109475, 37890, 23730,
+ -101326, 22457, 37679, -56270, 67383, 70593, 50813, -95677, 87773,
+ 73457, -123501, -59091, 107101, 91049, 27049, 72639, 29877, 58987};
+
+ char init_char = 7;
+ short init_short = 77;
+ int init_int = 777;
+ long init_long = 777;
+
+#define COND_REDUC_GOLDEN(NAME, TYPE, OP1, OP2) \
+ for (int i = 0; i < N; i++) \
+ if (cond1[i] > cond2[i]) \
+ result_gold_##NAME OP1 a_##TYPE[i] OP2 init_##TYPE OP2 \
+ b_##TYPE[i] OP2 c_##TYPE[i] OP2 d_##TYPE[i] OP2 e_##TYPE[i] OP2 \
+ f_##TYPE[i] OP2 g_##TYPE[i];
+
+ char result_reduc_sum_char
+ = cond_reduc_sum_char (a_char, b_char, c_char, d_char, e_char, f_char,
+ g_char, cond1, cond2, init_char, N);
+ char result_gold_reduc_sum_char = init_char;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_sum_char, char, +=, +)
+ if (result_reduc_sum_char != result_gold_reduc_sum_char)
+ __builtin_abort ();
+
+ short result_reduc_sum_short
+ = cond_reduc_sum_short (a_short, b_short, c_short, d_short, e_short,
+ f_short, g_short, cond1, cond2, init_short, N);
+ short result_gold_reduc_sum_short = init_short;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_sum_short, short, +=, +)
+ if (result_reduc_sum_short != result_gold_reduc_sum_short)
+ __builtin_abort ();
+
+ int result_reduc_sum_int
+ = cond_reduc_sum_int (a_int, b_int, c_int, d_int, e_int, f_int, g_int,
+ cond1, cond2, init_int, N);
+ int result_gold_reduc_sum_int = init_int;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_sum_int, int, +=, +)
+ if (result_reduc_sum_int != result_gold_reduc_sum_int)
+ __builtin_abort ();
+
+ long result_reduc_sum_long
+ = cond_reduc_sum_long (a_long, b_long, c_long, d_long, e_long, f_long,
+ g_long, cond1, cond2, init_long, N);
+ long result_gold_reduc_sum_long = init_long;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_sum_long, long, +=, +)
+ if (result_reduc_sum_long != result_gold_reduc_sum_long)
+ __builtin_abort ();
+
+ char result_reduc_and_char
+ = cond_reduc_and_char (a_char, b_char, c_char, d_char, e_char, f_char,
+ g_char, cond1, cond2, init_char, N);
+ char result_gold_reduc_and_char = init_char;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_and_char, char, &=, &)
+ if (result_reduc_and_char != result_gold_reduc_and_char)
+ __builtin_abort ();
+
+ short result_reduc_and_short
+ = cond_reduc_and_short (a_short, b_short, c_short, d_short, e_short,
+ f_short, g_short, cond1, cond2, init_short, N);
+ short result_gold_reduc_and_short = init_short;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_and_short, short, &=, &)
+ if (result_reduc_and_short != result_gold_reduc_and_short)
+ __builtin_abort ();
+
+ int result_reduc_and_int
+ = cond_reduc_and_int (a_int, b_int, c_int, d_int, e_int, f_int, g_int,
+ cond1, cond2, init_int, N);
+ int result_gold_reduc_and_int = init_int;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_and_int, int, &=, &)
+ if (result_reduc_and_int != result_gold_reduc_and_int)
+ __builtin_abort ();
+
+ long result_reduc_and_long
+ = cond_reduc_and_long (a_long, b_long, c_long, d_long, e_long, f_long,
+ g_long, cond1, cond2, init_long, N);
+ long result_gold_reduc_and_long = init_long;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_and_long, long, &=, &)
+ if (result_reduc_and_long != result_gold_reduc_and_long)
+ __builtin_abort ();
+
+ char result_reduc_ior_char
+ = cond_reduc_ior_char (a_char, b_char, c_char, d_char, e_char, f_char,
+ g_char, cond1, cond2, init_char, N);
+ char result_gold_reduc_ior_char = init_char;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_ior_char, char, |=, |)
+ if (result_reduc_ior_char != result_gold_reduc_ior_char)
+ __builtin_abort ();
+
+ short result_reduc_ior_short
+ = cond_reduc_ior_short (a_short, b_short, c_short, d_short, e_short,
+ f_short, g_short, cond1, cond2, init_short, N);
+ short result_gold_reduc_ior_short = init_short;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_ior_short, short, |=, |)
+ if (result_reduc_ior_short != result_gold_reduc_ior_short)
+ __builtin_abort ();
+
+ int result_reduc_ior_int
+ = cond_reduc_ior_int (a_int, b_int, c_int, d_int, e_int, f_int, g_int,
+ cond1, cond2, init_int, N);
+ int result_gold_reduc_ior_int = init_int;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_ior_int, int, |=, |)
+ if (result_reduc_ior_int != result_gold_reduc_ior_int)
+ __builtin_abort ();
+
+ long result_reduc_ior_long
+ = cond_reduc_ior_long (a_long, b_long, c_long, d_long, e_long, f_long,
+ g_long, cond1, cond2, init_long, N);
+ long result_gold_reduc_ior_long = init_long;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_ior_long, long, |=, |)
+ if (result_reduc_ior_long != result_gold_reduc_ior_long)
+ __builtin_abort ();
+
+ char result_reduc_xor_char
+ = cond_reduc_xor_char (a_char, b_char, c_char, d_char, e_char, f_char,
+ g_char, cond1, cond2, init_char, N);
+ char result_gold_reduc_xor_char = init_char;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_xor_char, char, ^=, ^)
+ if (result_reduc_xor_char != result_gold_reduc_xor_char)
+ __builtin_abort ();
+
+ short result_reduc_xor_short
+ = cond_reduc_xor_short (a_short, b_short, c_short, d_short, e_short,
+ f_short, g_short, cond1, cond2, init_short, N);
+ short result_gold_reduc_xor_short = init_short;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_xor_short, short, ^=, ^)
+ if (result_reduc_xor_short != result_gold_reduc_xor_short)
+ __builtin_abort ();
+
+ int result_reduc_xor_int
+ = cond_reduc_xor_int (a_int, b_int, c_int, d_int, e_int, f_int, g_int,
+ cond1, cond2, init_int, N);
+ int result_gold_reduc_xor_int = init_int;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_xor_int, int, ^=, ^)
+ if (result_reduc_xor_int != result_gold_reduc_xor_int)
+ __builtin_abort ();
+
+ long result_reduc_xor_long
+ = cond_reduc_xor_long (a_long, b_long, c_long, d_long, e_long, f_long,
+ g_long, cond1, cond2, init_long, N);
+ long result_gold_reduc_xor_long = init_long;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_xor_long, long, ^=, ^)
+ if (result_reduc_xor_long != result_gold_reduc_xor_long)
+ __builtin_abort ();
+ return 0;
+}
+
+/* FIXME: It should be '16' instead of '15' but we failed to vectorize the 'reduc_sum_char' case. */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 15 "vect" } } */
new file mode 100644
@@ -0,0 +1,84 @@
+/* { dg-require-effective-target vect_float } */
+/* { dg-require-effective-target vect_condition } */
+/* { dg-additional-options "-ffast-math" } */
+
+#include "tree-vect.h"
+
+#define N 27
+
+#define COND_REDUC(NAME, TYPE, OP1, OP2) \
+ TYPE __attribute__ ((noipa)) \
+ cond_##NAME (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c, \
+ TYPE *__restrict d, TYPE *__restrict e, TYPE *__restrict f, \
+ TYPE *__restrict g, int *__restrict cond1, \
+ int *__restrict cond2, TYPE init, int n) \
+ { \
+ TYPE result = init; \
+ for (int i = 0; i < n; i++) \
+ if (cond1[i] > cond2[i]) \
+ result OP1 a[i] OP2 init OP2 \
+ b[i] OP2 c[i] OP2 d[i] OP2 e[i] OP2 f[i] OP2 g[i]; \
+ return result; \
+ }
+
+COND_REDUC (reduc_sum_float, float, +=, +)
+
+int
+main (void)
+{
+ check_vect ();
+ int cond1[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1, 2, 3, 4,
+ 5, 6, 7, 8, 9, 10, 21, 22, 23, 24, 25, 26, 27};
+ int cond2[N] = {15, 5, 6, 7, 8, 9, 16, 17, 18, 19, 20, 11, 12, 10,
+ 21, 22, 23, 24, 25, 26, 27, 1, 2, 3, 4, 13, 14};
+
+ float a_float[N]
+ = {55, 66, 77, -88, 111, -9, 109, 37, 23, -101, 22, 37, -56, 67,
+ 70, 50, -95, 87, 73, -123, -59, 107, 91, 27, 72, 29, 58};
+
+ float b_float[N]
+ = {55, 66, 77, -88, 111, -9, 109, 37, 23, -101, 22, 37, -56, 67,
+ 70, 50, -95, 87, 73, -123, -59, 107, 91, 27, 72, 29, 58};
+
+ float c_float[N]
+ = {55, 66, 77, -88, 111, -9, 109, 37, 23, -101, 22, 37, -56, 67,
+ 70, 50, -95, 87, 73, -123, -59, 107, 91, 27, 72, 29, 58};
+
+ float d_float[N]
+ = {55, 66, 77, -88, 111, -9, 109, 37, 23, -101, 22, 37, -56, 67,
+ 70, 50, -95, 87, 73, -123, -59, 107, 91, 27, 72, 29, 58};
+
+ float e_float[N]
+ = {55, 66, 77, -88, 111, -9, 109, 37, 23, -101, 22, 37, -56, 67,
+ 70, 50, -95, 87, 73, -123, -59, 107, 91, 27, 72, 29, 58};
+
+ float f_float[N]
+ = {55, 66, 77, -88, 111, -9, 109, 37, 23, -101, 22, 37, -56, 67,
+ 70, 50, -95, 87, 73, -123, -59, 107, 91, 27, 72, 29, 58};
+
+ float g_float[N]
+ = {55, 66, 77, -88, 111, -9, 109, 37, 23, -101, 22, 37, -56, 67,
+ 70, 50, -95, 87, 73, -123, -59, 107, 91, 27, 72, 29, 58};
+
+ float init_float = 7;
+
+#define COND_REDUC_GOLDEN(NAME, TYPE, OP1, OP2) \
+ for (int i = 0; i < N; i++) \
+ if (cond1[i] > cond2[i]) \
+ result_gold_##NAME OP1 a_##TYPE[i] OP2 init_##TYPE OP2 \
+ b_##TYPE[i] OP2 c_##TYPE[i] OP2 d_##TYPE[i] OP2 e_##TYPE[i] OP2 \
+ f_##TYPE[i] OP2 g_##TYPE[i];
+
+ float result_reduc_sum_float
+ = cond_reduc_sum_float (a_float, b_float, c_float, d_float, e_float, f_float,
+ g_float, cond1, cond2, init_float, N);
+ float result_gold_reduc_sum_float = init_float;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_sum_float, float, +=, +)
+ if (result_reduc_sum_float != result_gold_reduc_sum_float)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
new file mode 100644
@@ -0,0 +1,96 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_condition } */
+/* { dg-additional-options "-fwrapv -fno-trapv" } */
+
+#include "tree-vect.h"
+
+#define N 27
+
+#define COND_REDUC(NAME, TYPE, OP1, OP2) \
+ TYPE __attribute__ ((noipa)) \
+ cond_##NAME (TYPE *__restrict a, int *__restrict cond1, \
+ int *__restrict cond2, TYPE init, int n) \
+ { \
+ TYPE result = init; \
+ for (int i = 0; i < n; i++) \
+ if (cond1[i] > cond2[i]) \
+ result OP1 a[i] OP2 init; \
+ return result; \
+ }
+
+COND_REDUC (reduc_sum_char, char, -=, +)
+COND_REDUC (reduc_sum_short, short, -=, +)
+COND_REDUC (reduc_sum_int, int, -=, +)
+COND_REDUC (reduc_sum_long, long, -=, +)
+
+
+int
+main (void)
+{
+ check_vect ();
+ int cond1[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1, 2, 3, 4,
+ 5, 6, 7, 8, 9, 10, 21, 22, 23, 24, 25, 26, 27};
+ int cond2[N] = {15, 5, 6, 7, 8, 9, 16, 17, 18, 19, 20, 11, 12, 10,
+ 21, 22, 23, 24, 25, 26, 27, 1, 2, 3, 4, 13, 14};
+
+ char a_char[N]
+ = {55, 66, 77, -88, 111, -9, 109, 37, 23, -101, 22, 37, -56, 67,
+ 70, 50, -95, 87, 73, -123, -59, 107, 91, 27, 72, 29, 58};
+ short a_short[N] = {551, 662, 773, -881, 1113, -97, 1094, 378, 237,
+ -1013, 224, 376, -562, 673, 705, 508, -956, 877,
+ 734, -1235, -590, 1071, 910, 270, 726, 298, 589};
+ int a_int[N]
+ = {5510, 6626, 7738, -8819, 11133, -974, 10947, 3789, 2373,
+ -10132, 2245, 3767, -5627, 6738, 7059, 5081, -9567, 8777,
+ 7345, -12350, -5909, 10710, 9104, 2704, 7263, 2987, 5898};
+ long a_long[N]
+ = {55106, 66266, 77387, -88198, 111339, -9740, 109475, 37890, 23730,
+ -101326, 22457, 37679, -56270, 67383, 70593, 50813, -95677, 87773,
+ 73457, -123501, -59091, 107101, 91049, 27049, 72639, 29877, 58987};
+
+ char init_char = 7;
+ short init_short = 77;
+ int init_int = 777;
+ long init_long = 777;
+
+#define COND_REDUC_GOLDEN(NAME, TYPE, OP1, OP2) \
+ for (int i = 0; i < N; i++) \
+ if (cond1[i] > cond2[i]) \
+ result_gold_##NAME OP1 a_##TYPE[i] OP2 init_##TYPE;
+
+ char result_reduc_sum_char
+ = cond_reduc_sum_char (a_char, cond1, cond2, init_char, N);
+ char result_gold_reduc_sum_char = init_char;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_sum_char, char, -=, +)
+ if (result_reduc_sum_char != result_gold_reduc_sum_char)
+ __builtin_abort ();
+
+ short result_reduc_sum_short
+ = cond_reduc_sum_short (a_short, cond1, cond2, init_short, N);
+ short result_gold_reduc_sum_short = init_short;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_sum_short, short, -=, +)
+ if (result_reduc_sum_short != result_gold_reduc_sum_short)
+ __builtin_abort ();
+
+ int result_reduc_sum_int
+ = cond_reduc_sum_int (a_int, cond1, cond2, init_int, N);
+ int result_gold_reduc_sum_int = init_int;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_sum_int, int, -=, +)
+ if (result_reduc_sum_int != result_gold_reduc_sum_int)
+ __builtin_abort ();
+
+ long result_reduc_sum_long
+ = cond_reduc_sum_long (a_long, cond1, cond2, init_long, N);
+ long result_gold_reduc_sum_long = init_long;
+#pragma GCC novector
+ COND_REDUC_GOLDEN (reduc_sum_long, long, -=, +)
+ if (result_reduc_sum_long != result_gold_reduc_sum_long)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
@@ -1788,14 +1788,118 @@ is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
r_nop2 = strip_nop_cond_scalar_reduction (*has_nop, r_op2);
/* Make R_OP1 to hold reduction variable. */
+ gimple *reassociate_stmt = NULL;
if (r_nop2 == PHI_RESULT (header_phi)
&& commutative_tree_code (reduction_op))
{
std::swap (r_op1, r_op2);
std::swap (r_nop1, r_nop2);
}
- else if (r_nop1 != PHI_RESULT (header_phi))
- return false;
+ else if (r_nop1 == PHI_RESULT (header_phi))
+ ;
+ /* Analyze the statement chain of STMT so that we could teach generate
+ better if-converison code sequence. We are trying to catch this
+ following situation:
+
+ loop-header:
+ reduc_1 = PHI <0, reduc_2>
+
+ ...
+ if (...)
+ tmp1 = reduc_1 + rhs1;
+ tmp2 = tmp1 + rhs2;
+ tmp3 = tmp2 + rhs3;
+ ...
+ reduc_3 = tmpN-1 + rhsN-1;
+
+ reduc_2 = PHI <reduc_1, reduc_3>
+
+ and re-associate it to:
+
+ reduc_1 = PHI <0, reduc_2>
+
+ tmp1 = rhs1;
+ tmp2 = tmp1 + rhs2;
+ tmp3 = tmp2 + rhs3;
+ ...
+ reduc_3 = tmpN-1 + rhsN-1;
+
+ ifcvt = cond_expr ? reduc_3 : 0;
+ reduc_2 = reduc_1 +/- ifcvt; */
+ else
+ {
+ /* We only re-associate the header PHI has 2 uses.
+ One is simple assign use with PLUS_EXPR or MINU_EXPR,
+ the other is the current PHI. That is:
+
+ reduc_1 = PHI <..., reduc_2> ---> Header PHI.
+ ...
+ if (...)
+ tmp1 = reduc_1 + rhs1; ---> First use.
+ ...
+ reduc_2 = PHI <reduc_1, reduc_3> ---> Last use.
+ ...
+
+ TODO: We can relax the check here in the future when we see there
+ are more cases to be optimized. */
+ if (num_imm_uses (PHI_RESULT (header_phi)) != 2
+ || EDGE_COUNT (gimple_bb (stmt)->succs) != 1)
+ return false;
+
+ /* For TYPE_OVERFLOW_UNDEFINED you have to convert the ops to unsigned
+ to avoid spurious undefined overflow. */
+ if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (PHI_RESULT (phi)))
+ && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (PHI_RESULT (phi))))
+ return false;
+
+ /* We should not re-associate floating-point reduction that will have
+ spurious exceptions. */
+ if (FLOAT_TYPE_P (TREE_TYPE (PHI_RESULT (phi)))
+ && (!flag_associative_math
+ || HONOR_SIGNED_ZEROS (TREE_TYPE (PHI_RESULT (phi)))
+ || HONOR_SIGN_DEPENDENT_ROUNDING (TREE_TYPE (PHI_RESULT (phi)))
+ || HONOR_NANS (TREE_TYPE (PHI_RESULT (phi)))))
+ return false;
+
+ /* The first use should be PHI that we are visiting. */
+ gimple *first_use_stmt = USE_STMT (
+ first_readonly_imm_use (&imm_iter, PHI_RESULT (header_phi)));
+ if (first_use_stmt != phi
+ /* This first use should locate at the fallthrough block. */
+ || gimple_bb (first_use_stmt) == gimple_bb (stmt)
+ || !flow_bb_inside_loop_p (gimple_bb (stmt)->loop_father,
+ gimple_bb (first_use_stmt))
+ || !find_fallthru_edge (gimple_bb (stmt)->succs)
+ || gimple_bb (first_use_stmt)
+ != FALLTHRU_EDGE (gimple_bb (stmt))->dest)
+ return false;
+
+ /* The last use STMT which should be a simple assign STMT
+ that has SSA_NAME lhs. */
+ gimple *last_use_stmt = USE_STMT (next_readonly_imm_use (&imm_iter));
+ if (gimple_code (last_use_stmt) != GIMPLE_ASSIGN
+ || TREE_CODE (gimple_assign_lhs (last_use_stmt)) != SSA_NAME
+ /* The last use STMT lhs should be single use and in the same
+ block as the current STMT. */
+ || !has_single_use (gimple_assign_lhs (last_use_stmt))
+ || gimple_bb (last_use_stmt) != gimple_bb (stmt))
+ return false;
+
+ r_op1 = *has_nop ? gimple_assign_lhs (last_use_stmt)
+ : PHI_RESULT (header_phi);
+ r_op2 = gimple_assign_lhs (stmt);
+ r_nop1 = *has_nop ? PHI_RESULT (header_phi) : NULL_TREE;
+ r_nop2 = *has_nop ? gimple_assign_lhs (last_use_stmt) : NULL_TREE;
+ reassociate_stmt = last_use_stmt;
+ tree_code reassociate_op = gimple_assign_rhs_code (reassociate_stmt);
+ if (reassociate_op != PLUS_EXPR
+ && reassociate_op != MINUS_EXPR
+ && reassociate_op != MULT_EXPR
+ && reassociate_op != BIT_IOR_EXPR
+ && reassociate_op != BIT_XOR_EXPR
+ && reassociate_op != BIT_AND_EXPR)
+ return false;
+ }
if (*has_nop)
{
@@ -1820,12 +1924,43 @@ is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
continue;
if (use_stmt == stmt)
continue;
+ if (use_stmt == reassociate_stmt)
+ continue;
if (gimple_code (use_stmt) != GIMPLE_PHI)
return false;
}
*op0 = r_op1; *op1 = r_op2;
*reduc = stmt;
+
+ if (reassociate_stmt)
+ {
+ /* Transform:
+
+ if (...)
+ tmp1 = reduc_1 + rhs1;
+ tmp2 = tmp1 + rhs2;
+ tmp3 = tmp2 + rhs3;
+
+ into:
+
+ tmp1 = rhs1 + 0; ---> We replace reduc_1 into '0'
+ tmp2 = tmp1 + rhs2;
+ tmp3 = tmp2 + rhs3;
+ ...
+ reduc_3 = tmpN-1 + rhsN-1;
+ ifcvt = cond_expr ? reduc_3 : 0; */
+ gimple_stmt_iterator gsi = gsi_for_stmt (reassociate_stmt);
+ gimple *new_stmt;
+ if (gimple_assign_rhs1 (reassociate_stmt) == r_op1)
+ new_stmt = gimple_build_assign (gimple_assign_lhs (reassociate_stmt),
+ gimple_assign_rhs2 (reassociate_stmt));
+ else if (gimple_assign_rhs2 (reassociate_stmt) == r_op1)
+ new_stmt = gimple_build_assign (gimple_assign_lhs (reassociate_stmt),
+ gimple_assign_rhs1 (reassociate_stmt));
+
+ gsi_replace (&gsi, new_stmt, true);
+ }
return true;
}
@@ -1912,12 +2047,17 @@ convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
gsi_remove (&stmt_it, true);
release_defs (nop_reduc);
}
+
gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
/* Delete original reduction stmt. */
- stmt_it = gsi_for_stmt (reduc);
- gsi_remove (&stmt_it, true);
- release_defs (reduc);
+ if (op1 != gimple_assign_lhs (reduc))
+ {
+ stmt_it = gsi_for_stmt (reduc);
+ gsi_remove (&stmt_it, true);
+ release_defs (reduc);
+ }
+
return rhs;
}