[1/19] middle-end ifcvt: Support bitfield lowering of multiple-exit loops
Checks
Commit Message
Hi,
With the patch enabling the vectorization of early-breaks, we'd like to allow
bitfield lowering in such loops, which requires the relaxation of allowing
multiple exits when doing so. In order to avoid a similar issue to PR107275,
the code that rejects loops with certain types of gimple_stmts was hoisted from
'if_convertible_loop_p_1' to 'get_loop_body_in_if_conv_order', to avoid trying
to lower bitfields in loops we are not going to vectorize anyway.
This also ensures 'ifcvt_local_dec' doesn't accidentally remove statements it
shouldn't as it will never come across them. I made sure to add a comment to
make clear that there is a direct connection between the two and if we were to
enable vectorization of any other gimple statement we should make sure both
handle it.
NOTE: This patch accepted before but never committed because it is a no-op
without the early break patch. This is a respun version of Andre's patch and
rebased to changes in ifcvt and updated to handle multiple exits.
Bootstrappend and regression tested on aarch64-none-linux-gnu and
x86_64-pc-linux-gnu and no issues.
gcc/ChangeLog:
* tree-if-conv.cc (if_convertible_loop_p_1): Move check from here ...
(get_loop_body_if_conv_order): ... to here.
(if_convertible_loop_p): Remove single_exit check.
(tree_if_conversion): Move single_exit check to if-conversion part and
support multiple exits.
gcc/testsuite/ChangeLog:
* gcc.dg/vect/vect-bitfield-read-1-not.c: New test.
* gcc.dg/vect/vect-bitfield-read-2-not.c: New test.
* gcc.dg/vect/vect-bitfield-read-8.c: New test.
* gcc.dg/vect/vect-bitfield-read-9.c: New test.
Co-Authored-By: Andre Vieira <andre.simoesdiasvieira@arm.com>
--- inline copy of patch --
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1-not.c b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1-not.c
new file mode 100644
index 0000000000000000000000000000000000000000..0d91067ebb27b1db2b2352975c43bce8b4171e3f
--
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1-not.c b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1-not.c
new file mode 100644
index 0000000000000000000000000000000000000000..0d91067ebb27b1db2b2352975c43bce8b4171e3f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1-not.c
@@ -0,0 +1,60 @@
+/* { dg-require-effective-target vect_shift } */
+/* { dg-require-effective-target vect_long_long } */
+/* { dg-additional-options { "-fdump-tree-ifcvt-all" } } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+extern void abort(void);
+
+struct s {
+ char a : 4;
+};
+
+#define N 32
+#define ELT0 {0}
+#define ELT1 {1}
+#define ELT2 {2}
+#define ELT3 {3}
+#define RES 56
+struct s A[N]
+ = { ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3};
+
+int __attribute__ ((noipa))
+f(struct s *ptr, unsigned n) {
+ int res = 0;
+ for (int i = 0; i < n; ++i)
+ {
+ switch (ptr[i].a)
+ {
+ case 0:
+ res += ptr[i].a + 1;
+ break;
+ case 1:
+ case 2:
+ case 3:
+ res += ptr[i].a;
+ break;
+ default:
+ return 0;
+ }
+ }
+ return res;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ if (f(&A[0], N) != RES)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-not "Bitfield OK to lower." "ifcvt" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-2-not.c b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-2-not.c
new file mode 100644
index 0000000000000000000000000000000000000000..4ac7b3fc0dfd1c9d0b5e94a2ba6a745545577ec1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-2-not.c
@@ -0,0 +1,49 @@
+/* { dg-require-effective-target vect_shift } */
+/* { dg-require-effective-target vect_long_long } */
+/* { dg-additional-options { "-fdump-tree-ifcvt-all" } } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+extern void abort(void);
+
+struct s {
+ char a : 4;
+};
+
+#define N 32
+#define ELT0 {0}
+#define ELT1 {1}
+#define ELT2 {2}
+#define ELT3 {3}
+#define RES 48
+struct s A[N]
+ = { ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3};
+
+int __attribute__ ((noipa))
+f(struct s *ptr, unsigned n) {
+ int res = 0;
+ for (int i = 0; i < n; ++i)
+ {
+ asm volatile ("" ::: "memory");
+ res += ptr[i].a;
+ }
+ return res;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ if (f(&A[0], N) != RES)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-not "Bitfield OK to lower." "ifcvt" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-8.c b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-8.c
new file mode 100644
index 0000000000000000000000000000000000000000..52cfd33d937ae90f3fe9556716c90e098b768ac8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-8.c
@@ -0,0 +1,49 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_shift } */
+/* { dg-additional-options { "-fdump-tree-ifcvt-all" } } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+extern void abort(void);
+
+struct s { int i : 31; };
+
+#define ELT0 {0}
+#define ELT1 {1}
+#define ELT2 {2}
+#define ELT3 {3}
+#define ELT4 {4}
+#define N 32
+#define RES 25
+struct s A[N]
+ = { ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT4, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3};
+
+int __attribute__ ((noipa))
+f(struct s *ptr, unsigned n) {
+ int res = 0;
+ for (int i = 0; i < n; ++i)
+ {
+ if (ptr[i].i == 4)
+ return res;
+ res += ptr[i].i;
+ }
+
+ return res;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ if (f(&A[0], N) != RES)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump "Bitfield OK to lower." "ifcvt" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-9.c b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-9.c
new file mode 100644
index 0000000000000000000000000000000000000000..ab814698131a5905def181eeed85d8a3c62b924b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-9.c
@@ -0,0 +1,51 @@
+/* { dg-require-effective-target vect_shift } */
+/* { dg-require-effective-target vect_long_long } */
+/* { dg-additional-options { "-fdump-tree-ifcvt-all" } } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+extern void abort(void);
+
+struct s {
+ unsigned i : 31;
+ char a : 4;
+};
+
+#define N 32
+#define ELT0 {0x7FFFFFFFUL, 0}
+#define ELT1 {0x7FFFFFFFUL, 1}
+#define ELT2 {0x7FFFFFFFUL, 2}
+#define ELT3 {0x7FFFFFFFUL, 3}
+#define ELT4 {0x7FFFFFFFUL, 4}
+#define RES 9
+struct s A[N]
+ = { ELT0, ELT4, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3};
+
+int __attribute__ ((noipa))
+f(struct s *ptr, unsigned n) {
+ int res = 0;
+ for (int i = 0; i < n; ++i)
+ {
+ if (ptr[i].a)
+ return 9;
+ res += ptr[i].a;
+ }
+ return res;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ if (f(&A[0], N) != RES)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump "Bitfield OK to lower." "ifcvt" } } */
+
diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
index e342532a343a3c066142adeec5fdfaf736a653e5..cdb0fe4c29dfa531e3277925022d127b13ffcc16 100644
--- a/gcc/tree-if-conv.cc
+++ b/gcc/tree-if-conv.cc
@@ -586,7 +586,7 @@ add_to_dst_predicate_list (class loop *loop, edge e,
/* Return true if one of the successor edges of BB exits LOOP. */
static bool
-bb_with_exit_edge_p (class loop *loop, basic_block bb)
+bb_with_exit_edge_p (const class loop *loop, basic_block bb)
{
edge e;
edge_iterator ei;
@@ -1268,6 +1268,44 @@ get_loop_body_in_if_conv_order (const class loop *loop)
}
free (blocks_in_bfs_order);
BITMAP_FREE (visited);
+
+ /* Go through loop and reject if-conversion or lowering of bitfields if we
+ encounter statements we do not believe the vectorizer will be able to
+ handle. If adding a new type of statement here, make sure
+ 'ifcvt_local_dce' is also able to handle it propertly. */
+ for (index = 0; index < loop->num_nodes; index++)
+ {
+ basic_block bb = blocks[index];
+ gimple_stmt_iterator gsi;
+
+ bool may_have_nonlocal_labels
+ = bb_with_exit_edge_p (loop, bb) || bb == loop->latch;
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ switch (gimple_code (gsi_stmt (gsi)))
+ {
+ case GIMPLE_LABEL:
+ if (!may_have_nonlocal_labels)
+ {
+ tree label
+ = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi)));
+ if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
+ {
+ free (blocks);
+ return NULL;
+ }
+ }
+ /* Fallthru. */
+ case GIMPLE_ASSIGN:
+ case GIMPLE_CALL:
+ case GIMPLE_DEBUG:
+ case GIMPLE_COND:
+ gimple_set_uid (gsi_stmt (gsi), 0);
+ break;
+ default:
+ free (blocks);
+ return NULL;
+ }
+ }
return blocks;
}
@@ -1438,36 +1476,6 @@ if_convertible_loop_p_1 (class loop *loop, vec<data_reference_p> *refs)
exit_bb = bb;
}
- for (i = 0; i < loop->num_nodes; i++)
- {
- basic_block bb = ifc_bbs[i];
- gimple_stmt_iterator gsi;
-
- bool may_have_nonlocal_labels
- = bb_with_exit_edge_p (loop, bb) || bb == loop->latch;
- for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
- switch (gimple_code (gsi_stmt (gsi)))
- {
- case GIMPLE_LABEL:
- if (!may_have_nonlocal_labels)
- {
- tree label
- = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi)));
- if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
- return false;
- }
- /* Fallthru. */
- case GIMPLE_ASSIGN:
- case GIMPLE_CALL:
- case GIMPLE_DEBUG:
- case GIMPLE_COND:
- gimple_set_uid (gsi_stmt (gsi), 0);
- break;
- default:
- return false;
- }
- }
-
data_reference_p dr;
innermost_DR_map
@@ -1579,14 +1587,6 @@ if_convertible_loop_p (class loop *loop, vec<data_reference_p> *refs)
return false;
}
- /* More than one loop exit is too much to handle. */
- if (!single_exit (loop))
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "multiple exits\n");
- return false;
- }
-
/* If one of the loop header's edge is an exit edge then do not
apply if-conversion. */
FOR_EACH_EDGE (e, ei, loop->header->succs)
@@ -3566,9 +3566,6 @@ tree_if_conversion (class loop *loop, vec<gimple *> *preds)
aggressive_if_conv = true;
}
- if (!single_exit (loop))
- goto cleanup;
-
/* If there are more than two BBs in the loop then there is at least one if
to convert. */
if (loop->num_nodes > 2
@@ -3588,15 +3585,25 @@ tree_if_conversion (class loop *loop, vec<gimple *> *preds)
if (loop->num_nodes > 2)
{
- need_to_ifcvt = true;
+ /* More than one loop exit is too much to handle. */
+ if (!single_exit (loop))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Can not ifcvt due to multiple exits\n");
+ }
+ else
+ {
+ need_to_ifcvt = true;
- if (!if_convertible_loop_p (loop, &refs) || !dbg_cnt (if_conversion_tree))
- goto cleanup;
+ if (!if_convertible_loop_p (loop, &refs)
+ || !dbg_cnt (if_conversion_tree))
+ goto cleanup;
- if ((need_to_predicate || any_complicated_phi)
- && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
- || loop->dont_vectorize))
- goto cleanup;
+ if ((need_to_predicate || any_complicated_phi)
+ && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
+ || loop->dont_vectorize))
+ goto cleanup;
+ }
}
if ((flag_tree_loop_vectorize || loop->force_vectorize)
@@ -3687,7 +3694,8 @@ tree_if_conversion (class loop *loop, vec<gimple *> *preds)
PHIs, those are to be kept in sync with the non-if-converted copy.
??? We'll still keep dead stores though. */
exit_bbs = BITMAP_ALLOC (NULL);
- bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index);
+ for (edge exit : get_loop_exit_edges (loop))
+ bitmap_set_bit (exit_bbs, exit->dest->index);
bitmap_set_bit (exit_bbs, loop->latch->index);
std::pair <tree, tree> *name_pair;
Comments
On Wed, 28 Jun 2023, Tamar Christina wrote:
> Hi,
>
> With the patch enabling the vectorization of early-breaks, we'd like to allow
> bitfield lowering in such loops, which requires the relaxation of allowing
> multiple exits when doing so. In order to avoid a similar issue to PR107275,
> the code that rejects loops with certain types of gimple_stmts was hoisted from
> 'if_convertible_loop_p_1' to 'get_loop_body_in_if_conv_order', to avoid trying
> to lower bitfields in loops we are not going to vectorize anyway.
>
> This also ensures 'ifcvt_local_dec' doesn't accidentally remove statements it
> shouldn't as it will never come across them. I made sure to add a comment to
> make clear that there is a direct connection between the two and if we were to
> enable vectorization of any other gimple statement we should make sure both
> handle it.
>
> NOTE: This patch accepted before but never committed because it is a no-op
> without the early break patch. This is a respun version of Andre's patch and
> rebased to changes in ifcvt and updated to handle multiple exits.
>
> Bootstrappend and regression tested on aarch64-none-linux-gnu and
> x86_64-pc-linux-gnu and no issues.
OK.
> gcc/ChangeLog:
>
> * tree-if-conv.cc (if_convertible_loop_p_1): Move check from here ...
> (get_loop_body_if_conv_order): ... to here.
> (if_convertible_loop_p): Remove single_exit check.
> (tree_if_conversion): Move single_exit check to if-conversion part and
> support multiple exits.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/vect/vect-bitfield-read-1-not.c: New test.
> * gcc.dg/vect/vect-bitfield-read-2-not.c: New test.
> * gcc.dg/vect/vect-bitfield-read-8.c: New test.
> * gcc.dg/vect/vect-bitfield-read-9.c: New test.
>
> Co-Authored-By: Andre Vieira <andre.simoesdiasvieira@arm.com>
>
> --- inline copy of patch --
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1-not.c b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1-not.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..0d91067ebb27b1db2b2352975c43bce8b4171e3f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1-not.c
> @@ -0,0 +1,60 @@
> +/* { dg-require-effective-target vect_shift } */
> +/* { dg-require-effective-target vect_long_long } */
> +/* { dg-additional-options { "-fdump-tree-ifcvt-all" } } */
> +
> +#include <stdarg.h>
> +#include "tree-vect.h"
> +
> +extern void abort(void);
> +
> +struct s {
> + char a : 4;
> +};
> +
> +#define N 32
> +#define ELT0 {0}
> +#define ELT1 {1}
> +#define ELT2 {2}
> +#define ELT3 {3}
> +#define RES 56
> +struct s A[N]
> + = { ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
> + ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
> + ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
> + ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3};
> +
> +int __attribute__ ((noipa))
> +f(struct s *ptr, unsigned n) {
> + int res = 0;
> + for (int i = 0; i < n; ++i)
> + {
> + switch (ptr[i].a)
> + {
> + case 0:
> + res += ptr[i].a + 1;
> + break;
> + case 1:
> + case 2:
> + case 3:
> + res += ptr[i].a;
> + break;
> + default:
> + return 0;
> + }
> + }
> + return res;
> +}
> +
> +int main (void)
> +{
> + check_vect ();
> +
> + if (f(&A[0], N) != RES)
> + abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-not "Bitfield OK to lower." "ifcvt" } } */
> +
> +
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-2-not.c b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-2-not.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..4ac7b3fc0dfd1c9d0b5e94a2ba6a745545577ec1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-2-not.c
> @@ -0,0 +1,49 @@
> +/* { dg-require-effective-target vect_shift } */
> +/* { dg-require-effective-target vect_long_long } */
> +/* { dg-additional-options { "-fdump-tree-ifcvt-all" } } */
> +
> +#include <stdarg.h>
> +#include "tree-vect.h"
> +
> +extern void abort(void);
> +
> +struct s {
> + char a : 4;
> +};
> +
> +#define N 32
> +#define ELT0 {0}
> +#define ELT1 {1}
> +#define ELT2 {2}
> +#define ELT3 {3}
> +#define RES 48
> +struct s A[N]
> + = { ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
> + ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
> + ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
> + ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3};
> +
> +int __attribute__ ((noipa))
> +f(struct s *ptr, unsigned n) {
> + int res = 0;
> + for (int i = 0; i < n; ++i)
> + {
> + asm volatile ("" ::: "memory");
> + res += ptr[i].a;
> + }
> + return res;
> +}
> +
> +int main (void)
> +{
> + check_vect ();
> +
> + if (f(&A[0], N) != RES)
> + abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-not "Bitfield OK to lower." "ifcvt" } } */
> +
> +
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-8.c b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-8.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..52cfd33d937ae90f3fe9556716c90e098b768ac8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-8.c
> @@ -0,0 +1,49 @@
> +/* { dg-require-effective-target vect_int } */
> +/* { dg-require-effective-target vect_shift } */
> +/* { dg-additional-options { "-fdump-tree-ifcvt-all" } } */
> +
> +#include <stdarg.h>
> +#include "tree-vect.h"
> +
> +extern void abort(void);
> +
> +struct s { int i : 31; };
> +
> +#define ELT0 {0}
> +#define ELT1 {1}
> +#define ELT2 {2}
> +#define ELT3 {3}
> +#define ELT4 {4}
> +#define N 32
> +#define RES 25
> +struct s A[N]
> + = { ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
> + ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
> + ELT0, ELT1, ELT4, ELT3, ELT0, ELT1, ELT2, ELT3,
> + ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3};
> +
> +int __attribute__ ((noipa))
> +f(struct s *ptr, unsigned n) {
> + int res = 0;
> + for (int i = 0; i < n; ++i)
> + {
> + if (ptr[i].i == 4)
> + return res;
> + res += ptr[i].i;
> + }
> +
> + return res;
> +}
> +
> +int main (void)
> +{
> + check_vect ();
> +
> + if (f(&A[0], N) != RES)
> + abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump "Bitfield OK to lower." "ifcvt" } } */
> +
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-9.c b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-9.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..ab814698131a5905def181eeed85d8a3c62b924b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-9.c
> @@ -0,0 +1,51 @@
> +/* { dg-require-effective-target vect_shift } */
> +/* { dg-require-effective-target vect_long_long } */
> +/* { dg-additional-options { "-fdump-tree-ifcvt-all" } } */
> +
> +#include <stdarg.h>
> +#include "tree-vect.h"
> +
> +extern void abort(void);
> +
> +struct s {
> + unsigned i : 31;
> + char a : 4;
> +};
> +
> +#define N 32
> +#define ELT0 {0x7FFFFFFFUL, 0}
> +#define ELT1 {0x7FFFFFFFUL, 1}
> +#define ELT2 {0x7FFFFFFFUL, 2}
> +#define ELT3 {0x7FFFFFFFUL, 3}
> +#define ELT4 {0x7FFFFFFFUL, 4}
> +#define RES 9
> +struct s A[N]
> + = { ELT0, ELT4, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
> + ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
> + ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
> + ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3};
> +
> +int __attribute__ ((noipa))
> +f(struct s *ptr, unsigned n) {
> + int res = 0;
> + for (int i = 0; i < n; ++i)
> + {
> + if (ptr[i].a)
> + return 9;
> + res += ptr[i].a;
> + }
> + return res;
> +}
> +
> +int main (void)
> +{
> + check_vect ();
> +
> + if (f(&A[0], N) != RES)
> + abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump "Bitfield OK to lower." "ifcvt" } } */
> +
> diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
> index e342532a343a3c066142adeec5fdfaf736a653e5..cdb0fe4c29dfa531e3277925022d127b13ffcc16 100644
> --- a/gcc/tree-if-conv.cc
> +++ b/gcc/tree-if-conv.cc
> @@ -586,7 +586,7 @@ add_to_dst_predicate_list (class loop *loop, edge e,
> /* Return true if one of the successor edges of BB exits LOOP. */
>
> static bool
> -bb_with_exit_edge_p (class loop *loop, basic_block bb)
> +bb_with_exit_edge_p (const class loop *loop, basic_block bb)
> {
> edge e;
> edge_iterator ei;
> @@ -1268,6 +1268,44 @@ get_loop_body_in_if_conv_order (const class loop *loop)
> }
> free (blocks_in_bfs_order);
> BITMAP_FREE (visited);
> +
> + /* Go through loop and reject if-conversion or lowering of bitfields if we
> + encounter statements we do not believe the vectorizer will be able to
> + handle. If adding a new type of statement here, make sure
> + 'ifcvt_local_dce' is also able to handle it propertly. */
> + for (index = 0; index < loop->num_nodes; index++)
> + {
> + basic_block bb = blocks[index];
> + gimple_stmt_iterator gsi;
> +
> + bool may_have_nonlocal_labels
> + = bb_with_exit_edge_p (loop, bb) || bb == loop->latch;
> + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> + switch (gimple_code (gsi_stmt (gsi)))
> + {
> + case GIMPLE_LABEL:
> + if (!may_have_nonlocal_labels)
> + {
> + tree label
> + = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi)));
> + if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
> + {
> + free (blocks);
> + return NULL;
> + }
> + }
> + /* Fallthru. */
> + case GIMPLE_ASSIGN:
> + case GIMPLE_CALL:
> + case GIMPLE_DEBUG:
> + case GIMPLE_COND:
> + gimple_set_uid (gsi_stmt (gsi), 0);
> + break;
> + default:
> + free (blocks);
> + return NULL;
> + }
> + }
> return blocks;
> }
>
> @@ -1438,36 +1476,6 @@ if_convertible_loop_p_1 (class loop *loop, vec<data_reference_p> *refs)
> exit_bb = bb;
> }
>
> - for (i = 0; i < loop->num_nodes; i++)
> - {
> - basic_block bb = ifc_bbs[i];
> - gimple_stmt_iterator gsi;
> -
> - bool may_have_nonlocal_labels
> - = bb_with_exit_edge_p (loop, bb) || bb == loop->latch;
> - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> - switch (gimple_code (gsi_stmt (gsi)))
> - {
> - case GIMPLE_LABEL:
> - if (!may_have_nonlocal_labels)
> - {
> - tree label
> - = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi)));
> - if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
> - return false;
> - }
> - /* Fallthru. */
> - case GIMPLE_ASSIGN:
> - case GIMPLE_CALL:
> - case GIMPLE_DEBUG:
> - case GIMPLE_COND:
> - gimple_set_uid (gsi_stmt (gsi), 0);
> - break;
> - default:
> - return false;
> - }
> - }
> -
> data_reference_p dr;
>
> innermost_DR_map
> @@ -1579,14 +1587,6 @@ if_convertible_loop_p (class loop *loop, vec<data_reference_p> *refs)
> return false;
> }
>
> - /* More than one loop exit is too much to handle. */
> - if (!single_exit (loop))
> - {
> - if (dump_file && (dump_flags & TDF_DETAILS))
> - fprintf (dump_file, "multiple exits\n");
> - return false;
> - }
> -
> /* If one of the loop header's edge is an exit edge then do not
> apply if-conversion. */
> FOR_EACH_EDGE (e, ei, loop->header->succs)
> @@ -3566,9 +3566,6 @@ tree_if_conversion (class loop *loop, vec<gimple *> *preds)
> aggressive_if_conv = true;
> }
>
> - if (!single_exit (loop))
> - goto cleanup;
> -
> /* If there are more than two BBs in the loop then there is at least one if
> to convert. */
> if (loop->num_nodes > 2
> @@ -3588,15 +3585,25 @@ tree_if_conversion (class loop *loop, vec<gimple *> *preds)
>
> if (loop->num_nodes > 2)
> {
> - need_to_ifcvt = true;
> + /* More than one loop exit is too much to handle. */
> + if (!single_exit (loop))
> + {
> + if (dump_file && (dump_flags & TDF_DETAILS))
> + fprintf (dump_file, "Can not ifcvt due to multiple exits\n");
> + }
> + else
> + {
> + need_to_ifcvt = true;
>
> - if (!if_convertible_loop_p (loop, &refs) || !dbg_cnt (if_conversion_tree))
> - goto cleanup;
> + if (!if_convertible_loop_p (loop, &refs)
> + || !dbg_cnt (if_conversion_tree))
> + goto cleanup;
>
> - if ((need_to_predicate || any_complicated_phi)
> - && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
> - || loop->dont_vectorize))
> - goto cleanup;
> + if ((need_to_predicate || any_complicated_phi)
> + && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
> + || loop->dont_vectorize))
> + goto cleanup;
> + }
> }
>
> if ((flag_tree_loop_vectorize || loop->force_vectorize)
> @@ -3687,7 +3694,8 @@ tree_if_conversion (class loop *loop, vec<gimple *> *preds)
> PHIs, those are to be kept in sync with the non-if-converted copy.
> ??? We'll still keep dead stores though. */
> exit_bbs = BITMAP_ALLOC (NULL);
> - bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index);
> + for (edge exit : get_loop_exit_edges (loop))
> + bitmap_set_bit (exit_bbs, exit->dest->index);
> bitmap_set_bit (exit_bbs, loop->latch->index);
>
> std::pair <tree, tree> *name_pair;
>
>
>
>
>
@@ -0,0 +1,60 @@
+/* { dg-require-effective-target vect_shift } */
+/* { dg-require-effective-target vect_long_long } */
+/* { dg-additional-options { "-fdump-tree-ifcvt-all" } } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+extern void abort(void);
+
+struct s {
+ char a : 4;
+};
+
+#define N 32
+#define ELT0 {0}
+#define ELT1 {1}
+#define ELT2 {2}
+#define ELT3 {3}
+#define RES 56
+struct s A[N]
+ = { ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3};
+
+int __attribute__ ((noipa))
+f(struct s *ptr, unsigned n) {
+ int res = 0;
+ for (int i = 0; i < n; ++i)
+ {
+ switch (ptr[i].a)
+ {
+ case 0:
+ res += ptr[i].a + 1;
+ break;
+ case 1:
+ case 2:
+ case 3:
+ res += ptr[i].a;
+ break;
+ default:
+ return 0;
+ }
+ }
+ return res;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ if (f(&A[0], N) != RES)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-not "Bitfield OK to lower." "ifcvt" } } */
+
+
new file mode 100644
@@ -0,0 +1,49 @@
+/* { dg-require-effective-target vect_shift } */
+/* { dg-require-effective-target vect_long_long } */
+/* { dg-additional-options { "-fdump-tree-ifcvt-all" } } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+extern void abort(void);
+
+struct s {
+ char a : 4;
+};
+
+#define N 32
+#define ELT0 {0}
+#define ELT1 {1}
+#define ELT2 {2}
+#define ELT3 {3}
+#define RES 48
+struct s A[N]
+ = { ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3};
+
+int __attribute__ ((noipa))
+f(struct s *ptr, unsigned n) {
+ int res = 0;
+ for (int i = 0; i < n; ++i)
+ {
+ asm volatile ("" ::: "memory");
+ res += ptr[i].a;
+ }
+ return res;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ if (f(&A[0], N) != RES)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-not "Bitfield OK to lower." "ifcvt" } } */
+
+
new file mode 100644
@@ -0,0 +1,49 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_shift } */
+/* { dg-additional-options { "-fdump-tree-ifcvt-all" } } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+extern void abort(void);
+
+struct s { int i : 31; };
+
+#define ELT0 {0}
+#define ELT1 {1}
+#define ELT2 {2}
+#define ELT3 {3}
+#define ELT4 {4}
+#define N 32
+#define RES 25
+struct s A[N]
+ = { ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT4, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3};
+
+int __attribute__ ((noipa))
+f(struct s *ptr, unsigned n) {
+ int res = 0;
+ for (int i = 0; i < n; ++i)
+ {
+ if (ptr[i].i == 4)
+ return res;
+ res += ptr[i].i;
+ }
+
+ return res;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ if (f(&A[0], N) != RES)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump "Bitfield OK to lower." "ifcvt" } } */
+
new file mode 100644
@@ -0,0 +1,51 @@
+/* { dg-require-effective-target vect_shift } */
+/* { dg-require-effective-target vect_long_long } */
+/* { dg-additional-options { "-fdump-tree-ifcvt-all" } } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+extern void abort(void);
+
+struct s {
+ unsigned i : 31;
+ char a : 4;
+};
+
+#define N 32
+#define ELT0 {0x7FFFFFFFUL, 0}
+#define ELT1 {0x7FFFFFFFUL, 1}
+#define ELT2 {0x7FFFFFFFUL, 2}
+#define ELT3 {0x7FFFFFFFUL, 3}
+#define ELT4 {0x7FFFFFFFUL, 4}
+#define RES 9
+struct s A[N]
+ = { ELT0, ELT4, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3};
+
+int __attribute__ ((noipa))
+f(struct s *ptr, unsigned n) {
+ int res = 0;
+ for (int i = 0; i < n; ++i)
+ {
+ if (ptr[i].a)
+ return 9;
+ res += ptr[i].a;
+ }
+ return res;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ if (f(&A[0], N) != RES)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump "Bitfield OK to lower." "ifcvt" } } */
+
@@ -586,7 +586,7 @@ add_to_dst_predicate_list (class loop *loop, edge e,
/* Return true if one of the successor edges of BB exits LOOP. */
static bool
-bb_with_exit_edge_p (class loop *loop, basic_block bb)
+bb_with_exit_edge_p (const class loop *loop, basic_block bb)
{
edge e;
edge_iterator ei;
@@ -1268,6 +1268,44 @@ get_loop_body_in_if_conv_order (const class loop *loop)
}
free (blocks_in_bfs_order);
BITMAP_FREE (visited);
+
+ /* Go through loop and reject if-conversion or lowering of bitfields if we
+ encounter statements we do not believe the vectorizer will be able to
+ handle. If adding a new type of statement here, make sure
+ 'ifcvt_local_dce' is also able to handle it propertly. */
+ for (index = 0; index < loop->num_nodes; index++)
+ {
+ basic_block bb = blocks[index];
+ gimple_stmt_iterator gsi;
+
+ bool may_have_nonlocal_labels
+ = bb_with_exit_edge_p (loop, bb) || bb == loop->latch;
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ switch (gimple_code (gsi_stmt (gsi)))
+ {
+ case GIMPLE_LABEL:
+ if (!may_have_nonlocal_labels)
+ {
+ tree label
+ = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi)));
+ if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
+ {
+ free (blocks);
+ return NULL;
+ }
+ }
+ /* Fallthru. */
+ case GIMPLE_ASSIGN:
+ case GIMPLE_CALL:
+ case GIMPLE_DEBUG:
+ case GIMPLE_COND:
+ gimple_set_uid (gsi_stmt (gsi), 0);
+ break;
+ default:
+ free (blocks);
+ return NULL;
+ }
+ }
return blocks;
}
@@ -1438,36 +1476,6 @@ if_convertible_loop_p_1 (class loop *loop, vec<data_reference_p> *refs)
exit_bb = bb;
}
- for (i = 0; i < loop->num_nodes; i++)
- {
- basic_block bb = ifc_bbs[i];
- gimple_stmt_iterator gsi;
-
- bool may_have_nonlocal_labels
- = bb_with_exit_edge_p (loop, bb) || bb == loop->latch;
- for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
- switch (gimple_code (gsi_stmt (gsi)))
- {
- case GIMPLE_LABEL:
- if (!may_have_nonlocal_labels)
- {
- tree label
- = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi)));
- if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
- return false;
- }
- /* Fallthru. */
- case GIMPLE_ASSIGN:
- case GIMPLE_CALL:
- case GIMPLE_DEBUG:
- case GIMPLE_COND:
- gimple_set_uid (gsi_stmt (gsi), 0);
- break;
- default:
- return false;
- }
- }
-
data_reference_p dr;
innermost_DR_map
@@ -1579,14 +1587,6 @@ if_convertible_loop_p (class loop *loop, vec<data_reference_p> *refs)
return false;
}
- /* More than one loop exit is too much to handle. */
- if (!single_exit (loop))
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "multiple exits\n");
- return false;
- }
-
/* If one of the loop header's edge is an exit edge then do not
apply if-conversion. */
FOR_EACH_EDGE (e, ei, loop->header->succs)
@@ -3566,9 +3566,6 @@ tree_if_conversion (class loop *loop, vec<gimple *> *preds)
aggressive_if_conv = true;
}
- if (!single_exit (loop))
- goto cleanup;
-
/* If there are more than two BBs in the loop then there is at least one if
to convert. */
if (loop->num_nodes > 2
@@ -3588,15 +3585,25 @@ tree_if_conversion (class loop *loop, vec<gimple *> *preds)
if (loop->num_nodes > 2)
{
- need_to_ifcvt = true;
+ /* More than one loop exit is too much to handle. */
+ if (!single_exit (loop))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Can not ifcvt due to multiple exits\n");
+ }
+ else
+ {
+ need_to_ifcvt = true;
- if (!if_convertible_loop_p (loop, &refs) || !dbg_cnt (if_conversion_tree))
- goto cleanup;
+ if (!if_convertible_loop_p (loop, &refs)
+ || !dbg_cnt (if_conversion_tree))
+ goto cleanup;
- if ((need_to_predicate || any_complicated_phi)
- && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
- || loop->dont_vectorize))
- goto cleanup;
+ if ((need_to_predicate || any_complicated_phi)
+ && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
+ || loop->dont_vectorize))
+ goto cleanup;
+ }
}
if ((flag_tree_loop_vectorize || loop->force_vectorize)
@@ -3687,7 +3694,8 @@ tree_if_conversion (class loop *loop, vec<gimple *> *preds)
PHIs, those are to be kept in sync with the non-if-converted copy.
??? We'll still keep dead stores though. */
exit_bbs = BITMAP_ALLOC (NULL);
- bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index);
+ for (edge exit : get_loop_exit_edges (loop))
+ bitmap_set_bit (exit_bbs, exit->dest->index);
bitmap_set_bit (exit_bbs, loop->latch->index);
std::pair <tree, tree> *name_pair;