middle-end: check memory accesses in the destination block [PR113588].
Checks
Commit Message
Hi All,
When analyzing loads for early break it was always the intention that for the
exit where things get moved to we only check the loads that can be reached from
the condition.
However the main loop checks all loads and we skip the destination BB. As such
we never actually check the loads reachable from the COND in the last BB unless
this BB was also the exit chosen by the vectorizer.
This leads us to incorrectly vectorize the loop in the PR and in doing so access
out of bounds.
Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
Ok for master?
Thanks,
Tamar
gcc/ChangeLog:
PR tree-optimization/113588
* tree-vect-data-refs.cc (vect_analyze_early_break_dependences_1): New.
(vect_analyze_data_ref_dependence): Use it.
(vect_analyze_early_break_dependences): Update comments.
gcc/testsuite/ChangeLog:
PR tree-optimization/113588
* gcc.dg/vect/vect-early-break_108-pr113588.c: New test.
* gcc.dg/vect/vect-early-break_109-pr113588.c: New test.
--- inline copy of patch --
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_108-pr113588.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_108-pr113588.c
new file mode 100644
index 0000000000000000000000000000000000000000..e488619c9aac41fafbcf479818392a6bb7c6924f
--
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_108-pr113588.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_108-pr113588.c
new file mode 100644
index 0000000000000000000000000000000000000000..e488619c9aac41fafbcf479818392a6bb7c6924f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_108-pr113588.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
+
+int foo (const char *s, unsigned long n)
+{
+ unsigned long len = 0;
+ while (*s++ && n--)
+ ++len;
+ return len;
+}
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_109-pr113588.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_109-pr113588.c
new file mode 100644
index 0000000000000000000000000000000000000000..488c19d3ede809631d1a7ede0e7f7bcdc7a1ae43
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_109-pr113588.c
@@ -0,0 +1,44 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target mmap } */
+
+/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
+
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "tree-vect.h"
+
+__attribute__((noipa))
+int foo (const char *s, unsigned long n)
+{
+ unsigned long len = 0;
+ while (*s++ && n--)
+ ++len;
+ return len;
+}
+
+int main()
+{
+
+ check_vect ();
+
+ long pgsz = sysconf (_SC_PAGESIZE);
+ void *p = mmap (NULL, pgsz * 3, PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
+ if (p == MAP_FAILED)
+ return 0;
+ mprotect (p, pgsz, PROT_NONE);
+ mprotect (p+2*pgsz, pgsz, PROT_NONE);
+ char *p1 = p + pgsz;
+ p1[0] = 1;
+ p1[1] = 0;
+ foo (p1, 1000);
+ p1 = p + 2*pgsz - 2;
+ p1[0] = 1;
+ p1[1] = 0;
+ foo (p1, 1000);
+ return 0;
+}
+
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index f592aeb8028afd4fd70e2175104efab2a2c0d82e..52cef242a7ce5d0e525bff639fa1dc2f0a6f30b9 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -619,10 +619,69 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
return opt_result::success ();
}
-/* Funcion vect_analyze_early_break_dependences.
+/* Function vect_analyze_early_break_dependences_1
- Examime all the data references in the loop and make sure that if we have
- mulitple exits that we are able to safely move stores such that they become
+ Helper function of vect_analyze_early_break_dependences which performs safety
+ analysis for load operations in an early break. */
+
+static opt_result
+vect_analyze_early_break_dependences_1 (data_reference *dr_ref, gimple *stmt)
+{
+ /* We currently only support statically allocated objects due to
+ not having first-faulting loads support or peeling for
+ alignment support. Compute the size of the referenced object
+ (it could be dynamically allocated). */
+ tree obj = DR_BASE_ADDRESS (dr_ref);
+ if (!obj || TREE_CODE (obj) != ADDR_EXPR)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "early breaks only supported on statically"
+ " allocated objects.\n");
+ return opt_result::failure_at (stmt,
+ "can't safely apply code motion to "
+ "dependencies of %G to vectorize "
+ "the early exit.\n", stmt);
+ }
+
+ tree refop = TREE_OPERAND (obj, 0);
+ tree refbase = get_base_address (refop);
+ if (!refbase || !DECL_P (refbase) || !DECL_SIZE (refbase)
+ || TREE_CODE (DECL_SIZE (refbase)) != INTEGER_CST)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "early breaks only supported on"
+ " statically allocated objects.\n");
+ return opt_result::failure_at (stmt,
+ "can't safely apply code motion to "
+ "dependencies of %G to vectorize "
+ "the early exit.\n", stmt);
+ }
+
+ /* Check if vector accesses to the object will be within bounds.
+ must be a constant or assume loop will be versioned or niters
+ bounded by VF so accesses are within range. */
+ if (!ref_within_array_bound (stmt, DR_REF (dr_ref)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "early breaks not supported: vectorization "
+ "would %s beyond size of obj.",
+ DR_IS_READ (dr_ref) ? "read" : "write");
+ return opt_result::failure_at (stmt,
+ "can't safely apply code motion to "
+ "dependencies of %G to vectorize "
+ "the early exit.\n", stmt);
+ }
+
+ return opt_result::success ();
+}
+
+/* Function vect_analyze_early_break_dependences.
+
+ Examine all the data references in the loop and make sure that if we have
+ multiple exits that we are able to safely move stores such that they become
safe for vectorization. The function also calculates the place where to move
the instructions to and computes what the new vUSE chain should be.
@@ -639,7 +698,7 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
- Multiple loads are allowed as long as they don't alias.
NOTE:
- This implemementation is very conservative. Any overlappig loads/stores
+ This implementation is very conservative. Any overlapping loads/stores
that take place before the early break statement gets rejected aside from
WAR dependencies.
@@ -668,7 +727,6 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
auto_vec<data_reference *> bases;
basic_block dest_bb = NULL;
- hash_set <gimple *> visited;
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
class loop *loop_nest = loop_outer (loop);
@@ -683,6 +741,7 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
dest_bb = single_pred (loop->latch);
basic_block bb = dest_bb;
+ /* First analyse all blocks leading to dest_bb excluding dest_bb itself. */
do
{
/* If the destination block is also the header then we have nothing to do. */
@@ -707,53 +766,11 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
if (!dr_ref)
continue;
- /* We currently only support statically allocated objects due to
- not having first-faulting loads support or peeling for
- alignment support. Compute the size of the referenced object
- (it could be dynamically allocated). */
- tree obj = DR_BASE_ADDRESS (dr_ref);
- if (!obj || TREE_CODE (obj) != ADDR_EXPR)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "early breaks only supported on statically"
- " allocated objects.\n");
- return opt_result::failure_at (stmt,
- "can't safely apply code motion to "
- "dependencies of %G to vectorize "
- "the early exit.\n", stmt);
- }
-
- tree refop = TREE_OPERAND (obj, 0);
- tree refbase = get_base_address (refop);
- if (!refbase || !DECL_P (refbase) || !DECL_SIZE (refbase)
- || TREE_CODE (DECL_SIZE (refbase)) != INTEGER_CST)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "early breaks only supported on"
- " statically allocated objects.\n");
- return opt_result::failure_at (stmt,
- "can't safely apply code motion to "
- "dependencies of %G to vectorize "
- "the early exit.\n", stmt);
- }
-
- /* Check if vector accesses to the object will be within bounds.
- must be a constant or assume loop will be versioned or niters
- bounded by VF so accesses are within range. */
- if (!ref_within_array_bound (stmt, DR_REF (dr_ref)))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "early breaks not supported: vectorization "
- "would %s beyond size of obj.",
- DR_IS_READ (dr_ref) ? "read" : "write");
- return opt_result::failure_at (stmt,
- "can't safely apply code motion to "
- "dependencies of %G to vectorize "
- "the early exit.\n", stmt);
- }
+ /* Check if the operation is one we can safely do. */
+ opt_result res
+ = vect_analyze_early_break_dependences_1 (dr_ref, stmt);
+ if (!res)
+ return res;
if (DR_IS_READ (dr_ref))
bases.safe_push (dr_ref);
@@ -817,6 +834,51 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
}
while (bb != loop->header);
+ /* For the destination BB we need to only analyze loads reachable from the early
+ break statement itself. */
+ auto_vec <tree> workset;
+ hash_set <tree> visited;
+ gimple *last_stmt = gsi_stmt (gsi_last_bb (dest_bb));
+ gcond *last_cond = dyn_cast <gcond *> (last_stmt);
+ /* If the cast fails we have a different control flow statement in the latch. Most
+ commonly this is a switch. */
+ if (!last_cond)
+ return opt_result::failure_at (last_stmt,
+ "can't safely apply code motion to dependencies"
+ " to vectorize the early exit, unknown control fow"
+ " in stmt %G", last_stmt);
+ workset.safe_push (gimple_cond_lhs (last_cond));
+ workset.safe_push (gimple_cond_rhs (last_cond));
+
+ imm_use_iterator imm_iter;
+ use_operand_p use_p;
+ tree lhs;
+ do
+ {
+ tree op = workset.pop ();
+ if (visited.add (op))
+ continue;
+ stmt_vec_info stmt_vinfo = loop_vinfo->lookup_def (op);
+
+ /* Not defined in loop, don't care. */
+ if (!stmt_vinfo)
+ continue;
+ gimple *stmt = STMT_VINFO_STMT (stmt_vinfo);
+ auto dr_ref = STMT_VINFO_DATA_REF (stmt_vinfo);
+ if (dr_ref)
+ {
+ opt_result res
+ = vect_analyze_early_break_dependences_1 (dr_ref, stmt);
+ if (!res)
+ return res;
+ }
+ else
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, op)
+ if ((lhs = gimple_get_lhs (USE_STMT (use_p))))
+ workset.safe_push (lhs);
+ }
+ while (!workset.is_empty ());
+
/* We don't allow outer -> inner loop transitions which should have been
trapped already during loop form analysis. */
gcc_assert (dest_bb->loop_father == loop);
Comments
On Mon, 29 Jan 2024, Tamar Christina wrote:
> Hi All,
>
> When analyzing loads for early break it was always the intention that
> for the exit where things get moved to we only check the loads that can
> be reached from the condition.
Looking at the code I'm a bit confused that we always move to
single_pred (loop->latch) - IIRC that was different at some point?
Shouldn't we move stores after the last early exit condition instead?
In particular for the peeled case single_pred (loop->latch) is the
block with the actual early exit condition? So for that case we'd
need to move to the latch itself instead? For non-peeled we move
to the block with the IV condition which looks OK.
> However the main loop checks all loads and we skip the destination BB.
> As such we never actually check the loads reachable from the COND in the
> last BB unless this BB was also the exit chosen by the vectorizer.
>
> This leads us to incorrectly vectorize the loop in the PR and in doing so access
> out of bounds.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
The patch ends up with a worklist and another confusing comment
+ /* For the destination BB we need to only analyze loads reachable from
the early
+ break statement itself. */
But I think it's a downstream issue from the issue above. That said,
even for the non-peeled case we need to check ref_within_array_bound,
no?
So what about re-doing that initial loop like the following instead
(and also fix dest_bb, but I'd like clarification here). Basically
walk all blocks, do the ref_within_array_bound first and only
after we've seen 'dest_bb' do the checks required for moving
stores for all upstream BBs.
And dest_bb should be
/* Move side-effects to the in-loop destination of the last early
exit. */
if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
dest_bb = loop->latch;
else
dest_bb = single_pred (loop->latch);
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index f592aeb8028..d6c8910dd6c 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -668,7 +668,6 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
auto_vec<data_reference *> bases;
basic_block dest_bb = NULL;
- hash_set <gimple *> visited;
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
class loop *loop_nest = loop_outer (loop);
@@ -681,15 +680,11 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
side-effects to is always the latch connected exit. When we support
general control flow we can do better but for now this is fine. */
dest_bb = single_pred (loop->latch);
- basic_block bb = dest_bb;
+ basic_block bb = loop->latch;
+ bool check_deps = false;
do
{
- /* If the destination block is also the header then we have nothing to do. */
- if (!single_pred_p (bb))
- continue;
-
- bb = single_pred (bb);
gimple_stmt_iterator gsi = gsi_last_bb (bb);
/* Now analyze all the remaining statements and try to determine which
@@ -707,6 +702,25 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
if (!dr_ref)
continue;
+ /* Check if vector accesses to the object will be within bounds.
+ must be a constant or assume loop will be versioned or niters
+ bounded by VF so accesses are within range. */
+ if (!ref_within_array_bound (stmt, DR_REF (dr_ref)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "early breaks not supported: vectorization "
+ "would %s beyond size of obj.",
+ DR_IS_READ (dr_ref) ? "read" : "write");
+ return opt_result::failure_at (stmt,
+ "can't safely apply code motion to "
+ "dependencies of %G to vectorize "
+ "the early exit.\n", stmt);
+ }
+
+ if (!check_deps)
+ continue;
+
/* We currently only support statically allocated objects due to
not having first-faulting loads support or peeling for
alignment support. Compute the size of the referenced object
@@ -739,22 +753,6 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
"the early exit.\n", stmt);
}
- /* Check if vector accesses to the object will be within bounds.
- must be a constant or assume loop will be versioned or niters
- bounded by VF so accesses are within range. */
- if (!ref_within_array_bound (stmt, DR_REF (dr_ref)))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "early breaks not supported: vectorization "
- "would %s beyond size of obj.",
- DR_IS_READ (dr_ref) ? "read" : "write");
- return opt_result::failure_at (stmt,
- "can't safely apply code motion to "
- "dependencies of %G to vectorize "
- "the early exit.\n", stmt);
- }
-
if (DR_IS_READ (dr_ref))
bases.safe_push (dr_ref);
else if (DR_IS_WRITE (dr_ref))
@@ -814,8 +812,16 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
"marked statement for vUSE update: %G", stmt);
}
}
+ if (!single_pred_p (bb))
+ {
+ gcc_assert (bb == loop->header);
+ break;
+ }
+ if (bb == dest_bb)
+ check_deps = true;
+ bb = single_pred (bb);
}
- while (bb != loop->header);
+ while (1);
/* We don't allow outer -> inner loop transitions which should have been
trapped already during loop form analysis. */
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> PR tree-optimization/113588
> * tree-vect-data-refs.cc (vect_analyze_early_break_dependences_1): New.
> (vect_analyze_data_ref_dependence): Use it.
> (vect_analyze_early_break_dependences): Update comments.
>
> gcc/testsuite/ChangeLog:
>
> PR tree-optimization/113588
> * gcc.dg/vect/vect-early-break_108-pr113588.c: New test.
> * gcc.dg/vect/vect-early-break_109-pr113588.c: New test.
>
> --- inline copy of patch --
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_108-pr113588.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_108-pr113588.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..e488619c9aac41fafbcf479818392a6bb7c6924f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_108-pr113588.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-add-options vect_early_break } */
> +/* { dg-require-effective-target vect_early_break } */
> +/* { dg-require-effective-target vect_int } */
> +
> +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
> +
> +int foo (const char *s, unsigned long n)
> +{
> + unsigned long len = 0;
> + while (*s++ && n--)
> + ++len;
> + return len;
> +}
> +
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_109-pr113588.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_109-pr113588.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..488c19d3ede809631d1a7ede0e7f7bcdc7a1ae43
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_109-pr113588.c
> @@ -0,0 +1,44 @@
> +/* { dg-add-options vect_early_break } */
> +/* { dg-require-effective-target vect_early_break } */
> +/* { dg-require-effective-target vect_int } */
> +/* { dg-require-effective-target mmap } */
> +
> +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
> +
> +#include <sys/mman.h>
> +#include <unistd.h>
> +
> +#include "tree-vect.h"
> +
> +__attribute__((noipa))
> +int foo (const char *s, unsigned long n)
> +{
> + unsigned long len = 0;
> + while (*s++ && n--)
> + ++len;
> + return len;
> +}
> +
> +int main()
> +{
> +
> + check_vect ();
> +
> + long pgsz = sysconf (_SC_PAGESIZE);
> + void *p = mmap (NULL, pgsz * 3, PROT_READ|PROT_WRITE,
> + MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
> + if (p == MAP_FAILED)
> + return 0;
> + mprotect (p, pgsz, PROT_NONE);
> + mprotect (p+2*pgsz, pgsz, PROT_NONE);
> + char *p1 = p + pgsz;
> + p1[0] = 1;
> + p1[1] = 0;
> + foo (p1, 1000);
> + p1 = p + 2*pgsz - 2;
> + p1[0] = 1;
> + p1[1] = 0;
> + foo (p1, 1000);
> + return 0;
> +}
> +
> diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
> index f592aeb8028afd4fd70e2175104efab2a2c0d82e..52cef242a7ce5d0e525bff639fa1dc2f0a6f30b9 100644
> --- a/gcc/tree-vect-data-refs.cc
> +++ b/gcc/tree-vect-data-refs.cc
> @@ -619,10 +619,69 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
> return opt_result::success ();
> }
>
> -/* Funcion vect_analyze_early_break_dependences.
> +/* Function vect_analyze_early_break_dependences_1
>
> - Examime all the data references in the loop and make sure that if we have
> - mulitple exits that we are able to safely move stores such that they become
> + Helper function of vect_analyze_early_break_dependences which performs safety
> + analysis for load operations in an early break. */
> +
> +static opt_result
> +vect_analyze_early_break_dependences_1 (data_reference *dr_ref, gimple *stmt)
> +{
> + /* We currently only support statically allocated objects due to
> + not having first-faulting loads support or peeling for
> + alignment support. Compute the size of the referenced object
> + (it could be dynamically allocated). */
> + tree obj = DR_BASE_ADDRESS (dr_ref);
> + if (!obj || TREE_CODE (obj) != ADDR_EXPR)
> + {
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> + "early breaks only supported on statically"
> + " allocated objects.\n");
> + return opt_result::failure_at (stmt,
> + "can't safely apply code motion to "
> + "dependencies of %G to vectorize "
> + "the early exit.\n", stmt);
> + }
> +
> + tree refop = TREE_OPERAND (obj, 0);
> + tree refbase = get_base_address (refop);
> + if (!refbase || !DECL_P (refbase) || !DECL_SIZE (refbase)
> + || TREE_CODE (DECL_SIZE (refbase)) != INTEGER_CST)
> + {
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> + "early breaks only supported on"
> + " statically allocated objects.\n");
> + return opt_result::failure_at (stmt,
> + "can't safely apply code motion to "
> + "dependencies of %G to vectorize "
> + "the early exit.\n", stmt);
> + }
> +
> + /* Check if vector accesses to the object will be within bounds.
> + must be a constant or assume loop will be versioned or niters
> + bounded by VF so accesses are within range. */
> + if (!ref_within_array_bound (stmt, DR_REF (dr_ref)))
> + {
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> + "early breaks not supported: vectorization "
> + "would %s beyond size of obj.",
> + DR_IS_READ (dr_ref) ? "read" : "write");
> + return opt_result::failure_at (stmt,
> + "can't safely apply code motion to "
> + "dependencies of %G to vectorize "
> + "the early exit.\n", stmt);
> + }
> +
> + return opt_result::success ();
> +}
> +
> +/* Function vect_analyze_early_break_dependences.
> +
> + Examine all the data references in the loop and make sure that if we have
> + multiple exits that we are able to safely move stores such that they become
> safe for vectorization. The function also calculates the place where to move
> the instructions to and computes what the new vUSE chain should be.
>
> @@ -639,7 +698,7 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
> - Multiple loads are allowed as long as they don't alias.
>
> NOTE:
> - This implemementation is very conservative. Any overlappig loads/stores
> + This implementation is very conservative. Any overlapping loads/stores
> that take place before the early break statement gets rejected aside from
> WAR dependencies.
>
> @@ -668,7 +727,6 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
> auto_vec<data_reference *> bases;
> basic_block dest_bb = NULL;
>
> - hash_set <gimple *> visited;
> class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
> class loop *loop_nest = loop_outer (loop);
>
> @@ -683,6 +741,7 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
> dest_bb = single_pred (loop->latch);
> basic_block bb = dest_bb;
>
> + /* First analyse all blocks leading to dest_bb excluding dest_bb itself. */
> do
> {
> /* If the destination block is also the header then we have nothing to do. */
> @@ -707,53 +766,11 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
> if (!dr_ref)
> continue;
>
> - /* We currently only support statically allocated objects due to
> - not having first-faulting loads support or peeling for
> - alignment support. Compute the size of the referenced object
> - (it could be dynamically allocated). */
> - tree obj = DR_BASE_ADDRESS (dr_ref);
> - if (!obj || TREE_CODE (obj) != ADDR_EXPR)
> - {
> - if (dump_enabled_p ())
> - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> - "early breaks only supported on statically"
> - " allocated objects.\n");
> - return opt_result::failure_at (stmt,
> - "can't safely apply code motion to "
> - "dependencies of %G to vectorize "
> - "the early exit.\n", stmt);
> - }
> -
> - tree refop = TREE_OPERAND (obj, 0);
> - tree refbase = get_base_address (refop);
> - if (!refbase || !DECL_P (refbase) || !DECL_SIZE (refbase)
> - || TREE_CODE (DECL_SIZE (refbase)) != INTEGER_CST)
> - {
> - if (dump_enabled_p ())
> - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> - "early breaks only supported on"
> - " statically allocated objects.\n");
> - return opt_result::failure_at (stmt,
> - "can't safely apply code motion to "
> - "dependencies of %G to vectorize "
> - "the early exit.\n", stmt);
> - }
> -
> - /* Check if vector accesses to the object will be within bounds.
> - must be a constant or assume loop will be versioned or niters
> - bounded by VF so accesses are within range. */
> - if (!ref_within_array_bound (stmt, DR_REF (dr_ref)))
> - {
> - if (dump_enabled_p ())
> - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> - "early breaks not supported: vectorization "
> - "would %s beyond size of obj.",
> - DR_IS_READ (dr_ref) ? "read" : "write");
> - return opt_result::failure_at (stmt,
> - "can't safely apply code motion to "
> - "dependencies of %G to vectorize "
> - "the early exit.\n", stmt);
> - }
> + /* Check if the operation is one we can safely do. */
> + opt_result res
> + = vect_analyze_early_break_dependences_1 (dr_ref, stmt);
> + if (!res)
> + return res;
>
> if (DR_IS_READ (dr_ref))
> bases.safe_push (dr_ref);
> @@ -817,6 +834,51 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
> }
> while (bb != loop->header);
>
> + /* For the destination BB we need to only analyze loads reachable from the early
> + break statement itself. */
> + auto_vec <tree> workset;
> + hash_set <tree> visited;
> + gimple *last_stmt = gsi_stmt (gsi_last_bb (dest_bb));
> + gcond *last_cond = dyn_cast <gcond *> (last_stmt);
> + /* If the cast fails we have a different control flow statement in the latch. Most
> + commonly this is a switch. */
> + if (!last_cond)
> + return opt_result::failure_at (last_stmt,
> + "can't safely apply code motion to dependencies"
> + " to vectorize the early exit, unknown control fow"
> + " in stmt %G", last_stmt);
> + workset.safe_push (gimple_cond_lhs (last_cond));
> + workset.safe_push (gimple_cond_rhs (last_cond));
> +
> + imm_use_iterator imm_iter;
> + use_operand_p use_p;
> + tree lhs;
> + do
> + {
> + tree op = workset.pop ();
> + if (visited.add (op))
> + continue;
> + stmt_vec_info stmt_vinfo = loop_vinfo->lookup_def (op);
> +
> + /* Not defined in loop, don't care. */
> + if (!stmt_vinfo)
> + continue;
> + gimple *stmt = STMT_VINFO_STMT (stmt_vinfo);
> + auto dr_ref = STMT_VINFO_DATA_REF (stmt_vinfo);
> + if (dr_ref)
> + {
> + opt_result res
> + = vect_analyze_early_break_dependences_1 (dr_ref, stmt);
> + if (!res)
> + return res;
> + }
> + else
> + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, op)
> + if ((lhs = gimple_get_lhs (USE_STMT (use_p))))
> + workset.safe_push (lhs);
> + }
> + while (!workset.is_empty ());
> +
> /* We don't allow outer -> inner loop transitions which should have been
> trapped already during loop form analysis. */
> gcc_assert (dest_bb->loop_father == loop);
>
>
>
>
>
> -----Original Message-----
> From: Richard Biener <rguenther@suse.de>
> Sent: Tuesday, January 30, 2024 9:51 AM
> To: Tamar Christina <Tamar.Christina@arm.com>
> Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>; jlaw@ventanamicro.com
> Subject: Re: [PATCH]middle-end: check memory accesses in the destination block
> [PR113588].
>
> On Mon, 29 Jan 2024, Tamar Christina wrote:
>
> > Hi All,
> >
> > When analyzing loads for early break it was always the intention that
> > for the exit where things get moved to we only check the loads that can
> > be reached from the condition.
>
> Looking at the code I'm a bit confused that we always move to
> single_pred (loop->latch) - IIRC that was different at some point?
>
> Shouldn't we move stores after the last early exit condition instead?
Yes it was changed during another PR fix. The rationale at that time didn't take into account
the peeled case. It used to be that we would "search" for the the exit to place it in.
At that time the rational was, well it doesn't make sense. It has to go in the block that is the
last to be executed. With the non-peeled case it's always the one before the latch.
Or put differently, I think the destination should be the main IV block. I am not quite sure
I'm following why you want to put the peeled cases inside the latch block.
Ah, is it because the latch block is always going to only be executed when you make a full iteration?
That makes sense, but then I think we should also analyze the stores in all blocks (which your change
maybe already does, let me check) since we'll also lifting past the final block we need to update the vuses
there too.
If the above is correct then I think I understand what you're saying and will update the patch and do some
Checks.
Thanks,
Tamar
>
> In particular for the peeled case single_pred (loop->latch) is the
> block with the actual early exit condition? So for that case we'd
> need to move to the latch itself instead? For non-peeled we move
> to the block with the IV condition which looks OK.
>
> > However the main loop checks all loads and we skip the destination BB.
> > As such we never actually check the loads reachable from the COND in the
> > last BB unless this BB was also the exit chosen by the vectorizer.
> >
> > This leads us to incorrectly vectorize the loop in the PR and in doing so access
> > out of bounds.
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> >
> > Ok for master?
>
> The patch ends up with a worklist and another confusing comment
>
> + /* For the destination BB we need to only analyze loads reachable from
> the early
> + break statement itself. */
>
> But I think it's a downstream issue from the issue above. That said,
> even for the non-peeled case we need to check ref_within_array_bound,
> no?
>
> So what about re-doing that initial loop like the following instead
> (and also fix dest_bb, but I'd like clarification here). Basically
> walk all blocks, do the ref_within_array_bound first and only
> after we've seen 'dest_bb' do the checks required for moving
> stores for all upstream BBs.
>
> And dest_bb should be
>
> /* Move side-effects to the in-loop destination of the last early
> exit. */
> if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
> dest_bb = loop->latch;
> else
> dest_bb = single_pred (loop->latch);
>
>
> diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
> index f592aeb8028..d6c8910dd6c 100644
> --- a/gcc/tree-vect-data-refs.cc
> +++ b/gcc/tree-vect-data-refs.cc
> @@ -668,7 +668,6 @@ vect_analyze_early_break_dependences (loop_vec_info
> loop_vinfo)
> auto_vec<data_reference *> bases;
> basic_block dest_bb = NULL;
>
> - hash_set <gimple *> visited;
> class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
> class loop *loop_nest = loop_outer (loop);
>
> @@ -681,15 +680,11 @@ vect_analyze_early_break_dependences
> (loop_vec_info loop_vinfo)
> side-effects to is always the latch connected exit. When we support
> general control flow we can do better but for now this is fine. */
> dest_bb = single_pred (loop->latch);
> - basic_block bb = dest_bb;
> + basic_block bb = loop->latch;
> + bool check_deps = false;
>
> do
> {
> - /* If the destination block is also the header then we have nothing to do. */
> - if (!single_pred_p (bb))
> - continue;
> -
> - bb = single_pred (bb);
> gimple_stmt_iterator gsi = gsi_last_bb (bb);
>
> /* Now analyze all the remaining statements and try to determine which
> @@ -707,6 +702,25 @@ vect_analyze_early_break_dependences (loop_vec_info
> loop_vinfo)
> if (!dr_ref)
> continue;
>
> + /* Check if vector accesses to the object will be within bounds.
> + must be a constant or assume loop will be versioned or niters
> + bounded by VF so accesses are within range. */
> + if (!ref_within_array_bound (stmt, DR_REF (dr_ref)))
> + {
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> + "early breaks not supported: vectorization "
> + "would %s beyond size of obj.",
> + DR_IS_READ (dr_ref) ? "read" : "write");
> + return opt_result::failure_at (stmt,
> + "can't safely apply code motion to "
> + "dependencies of %G to vectorize "
> + "the early exit.\n", stmt);
> + }
> +
> + if (!check_deps)
> + continue;
> +
> /* We currently only support statically allocated objects due to
> not having first-faulting loads support or peeling for
> alignment support. Compute the size of the referenced object
> @@ -739,22 +753,6 @@ vect_analyze_early_break_dependences (loop_vec_info
> loop_vinfo)
> "the early exit.\n", stmt);
> }
>
> - /* Check if vector accesses to the object will be within bounds.
> - must be a constant or assume loop will be versioned or niters
> - bounded by VF so accesses are within range. */
> - if (!ref_within_array_bound (stmt, DR_REF (dr_ref)))
> - {
> - if (dump_enabled_p ())
> - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> - "early breaks not supported: vectorization "
> - "would %s beyond size of obj.",
> - DR_IS_READ (dr_ref) ? "read" : "write");
> - return opt_result::failure_at (stmt,
> - "can't safely apply code motion to "
> - "dependencies of %G to vectorize "
> - "the early exit.\n", stmt);
> - }
> -
> if (DR_IS_READ (dr_ref))
> bases.safe_push (dr_ref);
> else if (DR_IS_WRITE (dr_ref))
> @@ -814,8 +812,16 @@ vect_analyze_early_break_dependences (loop_vec_info
> loop_vinfo)
> "marked statement for vUSE update: %G", stmt);
> }
> }
> + if (!single_pred_p (bb))
> + {
> + gcc_assert (bb == loop->header);
> + break;
> + }
> + if (bb == dest_bb)
> + check_deps = true;
> + bb = single_pred (bb);
> }
> - while (bb != loop->header);
> + while (1);
>
> /* We don't allow outer -> inner loop transitions which should have been
> trapped already during loop form analysis. */
>
> > Thanks,
> > Tamar
> >
> > gcc/ChangeLog:
> >
> > PR tree-optimization/113588
> > * tree-vect-data-refs.cc (vect_analyze_early_break_dependences_1): New.
> > (vect_analyze_data_ref_dependence): Use it.
> > (vect_analyze_early_break_dependences): Update comments.
> >
> > gcc/testsuite/ChangeLog:
> >
> > PR tree-optimization/113588
> > * gcc.dg/vect/vect-early-break_108-pr113588.c: New test.
> > * gcc.dg/vect/vect-early-break_109-pr113588.c: New test.
> >
> > --- inline copy of patch --
> > diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_108-pr113588.c
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_108-pr113588.c
> > new file mode 100644
> > index
> 0000000000000000000000000000000000000000..e488619c9aac41fafbcf479
> 818392a6bb7c6924f
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_108-pr113588.c
> > @@ -0,0 +1,15 @@
> > +/* { dg-do compile } */
> > +/* { dg-add-options vect_early_break } */
> > +/* { dg-require-effective-target vect_early_break } */
> > +/* { dg-require-effective-target vect_int } */
> > +
> > +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
> > +
> > +int foo (const char *s, unsigned long n)
> > +{
> > + unsigned long len = 0;
> > + while (*s++ && n--)
> > + ++len;
> > + return len;
> > +}
> > +
> > diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_109-pr113588.c
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_109-pr113588.c
> > new file mode 100644
> > index
> 0000000000000000000000000000000000000000..488c19d3ede809631d1a7
> ede0e7f7bcdc7a1ae43
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_109-pr113588.c
> > @@ -0,0 +1,44 @@
> > +/* { dg-add-options vect_early_break } */
> > +/* { dg-require-effective-target vect_early_break } */
> > +/* { dg-require-effective-target vect_int } */
> > +/* { dg-require-effective-target mmap } */
> > +
> > +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
> > +
> > +#include <sys/mman.h>
> > +#include <unistd.h>
> > +
> > +#include "tree-vect.h"
> > +
> > +__attribute__((noipa))
> > +int foo (const char *s, unsigned long n)
> > +{
> > + unsigned long len = 0;
> > + while (*s++ && n--)
> > + ++len;
> > + return len;
> > +}
> > +
> > +int main()
> > +{
> > +
> > + check_vect ();
> > +
> > + long pgsz = sysconf (_SC_PAGESIZE);
> > + void *p = mmap (NULL, pgsz * 3, PROT_READ|PROT_WRITE,
> > + MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
> > + if (p == MAP_FAILED)
> > + return 0;
> > + mprotect (p, pgsz, PROT_NONE);
> > + mprotect (p+2*pgsz, pgsz, PROT_NONE);
> > + char *p1 = p + pgsz;
> > + p1[0] = 1;
> > + p1[1] = 0;
> > + foo (p1, 1000);
> > + p1 = p + 2*pgsz - 2;
> > + p1[0] = 1;
> > + p1[1] = 0;
> > + foo (p1, 1000);
> > + return 0;
> > +}
> > +
> > diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
> > index
> f592aeb8028afd4fd70e2175104efab2a2c0d82e..52cef242a7ce5d0e525bff639fa
> 1dc2f0a6f30b9 100644
> > --- a/gcc/tree-vect-data-refs.cc
> > +++ b/gcc/tree-vect-data-refs.cc
> > @@ -619,10 +619,69 @@ vect_analyze_data_ref_dependence (struct
> data_dependence_relation *ddr,
> > return opt_result::success ();
> > }
> >
> > -/* Funcion vect_analyze_early_break_dependences.
> > +/* Function vect_analyze_early_break_dependences_1
> >
> > - Examime all the data references in the loop and make sure that if we have
> > - mulitple exits that we are able to safely move stores such that they become
> > + Helper function of vect_analyze_early_break_dependences which performs
> safety
> > + analysis for load operations in an early break. */
> > +
> > +static opt_result
> > +vect_analyze_early_break_dependences_1 (data_reference *dr_ref, gimple
> *stmt)
> > +{
> > + /* We currently only support statically allocated objects due to
> > + not having first-faulting loads support or peeling for
> > + alignment support. Compute the size of the referenced object
> > + (it could be dynamically allocated). */
> > + tree obj = DR_BASE_ADDRESS (dr_ref);
> > + if (!obj || TREE_CODE (obj) != ADDR_EXPR)
> > + {
> > + if (dump_enabled_p ())
> > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > + "early breaks only supported on statically"
> > + " allocated objects.\n");
> > + return opt_result::failure_at (stmt,
> > + "can't safely apply code motion to "
> > + "dependencies of %G to vectorize "
> > + "the early exit.\n", stmt);
> > + }
> > +
> > + tree refop = TREE_OPERAND (obj, 0);
> > + tree refbase = get_base_address (refop);
> > + if (!refbase || !DECL_P (refbase) || !DECL_SIZE (refbase)
> > + || TREE_CODE (DECL_SIZE (refbase)) != INTEGER_CST)
> > + {
> > + if (dump_enabled_p ())
> > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > + "early breaks only supported on"
> > + " statically allocated objects.\n");
> > + return opt_result::failure_at (stmt,
> > + "can't safely apply code motion to "
> > + "dependencies of %G to vectorize "
> > + "the early exit.\n", stmt);
> > + }
> > +
> > + /* Check if vector accesses to the object will be within bounds.
> > + must be a constant or assume loop will be versioned or niters
> > + bounded by VF so accesses are within range. */
> > + if (!ref_within_array_bound (stmt, DR_REF (dr_ref)))
> > + {
> > + if (dump_enabled_p ())
> > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > + "early breaks not supported: vectorization "
> > + "would %s beyond size of obj.",
> > + DR_IS_READ (dr_ref) ? "read" : "write");
> > + return opt_result::failure_at (stmt,
> > + "can't safely apply code motion to "
> > + "dependencies of %G to vectorize "
> > + "the early exit.\n", stmt);
> > + }
> > +
> > + return opt_result::success ();
> > +}
> > +
> > +/* Function vect_analyze_early_break_dependences.
> > +
> > + Examine all the data references in the loop and make sure that if we have
> > + multiple exits that we are able to safely move stores such that they become
> > safe for vectorization. The function also calculates the place where to move
> > the instructions to and computes what the new vUSE chain should be.
> >
> > @@ -639,7 +698,7 @@ vect_analyze_data_ref_dependence (struct
> data_dependence_relation *ddr,
> > - Multiple loads are allowed as long as they don't alias.
> >
> > NOTE:
> > - This implemementation is very conservative. Any overlappig loads/stores
> > + This implementation is very conservative. Any overlapping loads/stores
> > that take place before the early break statement gets rejected aside from
> > WAR dependencies.
> >
> > @@ -668,7 +727,6 @@ vect_analyze_early_break_dependences (loop_vec_info
> loop_vinfo)
> > auto_vec<data_reference *> bases;
> > basic_block dest_bb = NULL;
> >
> > - hash_set <gimple *> visited;
> > class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
> > class loop *loop_nest = loop_outer (loop);
> >
> > @@ -683,6 +741,7 @@ vect_analyze_early_break_dependences (loop_vec_info
> loop_vinfo)
> > dest_bb = single_pred (loop->latch);
> > basic_block bb = dest_bb;
> >
> > + /* First analyse all blocks leading to dest_bb excluding dest_bb itself. */
> > do
> > {
> > /* If the destination block is also the header then we have nothing to do. */
> > @@ -707,53 +766,11 @@ vect_analyze_early_break_dependences
> (loop_vec_info loop_vinfo)
> > if (!dr_ref)
> > continue;
> >
> > - /* We currently only support statically allocated objects due to
> > - not having first-faulting loads support or peeling for
> > - alignment support. Compute the size of the referenced object
> > - (it could be dynamically allocated). */
> > - tree obj = DR_BASE_ADDRESS (dr_ref);
> > - if (!obj || TREE_CODE (obj) != ADDR_EXPR)
> > - {
> > - if (dump_enabled_p ())
> > - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > - "early breaks only supported on statically"
> > - " allocated objects.\n");
> > - return opt_result::failure_at (stmt,
> > - "can't safely apply code motion to "
> > - "dependencies of %G to vectorize "
> > - "the early exit.\n", stmt);
> > - }
> > -
> > - tree refop = TREE_OPERAND (obj, 0);
> > - tree refbase = get_base_address (refop);
> > - if (!refbase || !DECL_P (refbase) || !DECL_SIZE (refbase)
> > - || TREE_CODE (DECL_SIZE (refbase)) != INTEGER_CST)
> > - {
> > - if (dump_enabled_p ())
> > - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > - "early breaks only supported on"
> > - " statically allocated objects.\n");
> > - return opt_result::failure_at (stmt,
> > - "can't safely apply code motion to "
> > - "dependencies of %G to vectorize "
> > - "the early exit.\n", stmt);
> > - }
> > -
> > - /* Check if vector accesses to the object will be within bounds.
> > - must be a constant or assume loop will be versioned or niters
> > - bounded by VF so accesses are within range. */
> > - if (!ref_within_array_bound (stmt, DR_REF (dr_ref)))
> > - {
> > - if (dump_enabled_p ())
> > - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > - "early breaks not supported: vectorization "
> > - "would %s beyond size of obj.",
> > - DR_IS_READ (dr_ref) ? "read" : "write");
> > - return opt_result::failure_at (stmt,
> > - "can't safely apply code motion to "
> > - "dependencies of %G to vectorize "
> > - "the early exit.\n", stmt);
> > - }
> > + /* Check if the operation is one we can safely do. */
> > + opt_result res
> > + = vect_analyze_early_break_dependences_1 (dr_ref, stmt);
> > + if (!res)
> > + return res;
> >
> > if (DR_IS_READ (dr_ref))
> > bases.safe_push (dr_ref);
> > @@ -817,6 +834,51 @@ vect_analyze_early_break_dependences
> (loop_vec_info loop_vinfo)
> > }
> > while (bb != loop->header);
> >
> > + /* For the destination BB we need to only analyze loads reachable from the
> early
> > + break statement itself. */
> > + auto_vec <tree> workset;
> > + hash_set <tree> visited;
> > + gimple *last_stmt = gsi_stmt (gsi_last_bb (dest_bb));
> > + gcond *last_cond = dyn_cast <gcond *> (last_stmt);
> > + /* If the cast fails we have a different control flow statement in the latch. Most
> > + commonly this is a switch. */
> > + if (!last_cond)
> > + return opt_result::failure_at (last_stmt,
> > + "can't safely apply code motion to dependencies"
> > + " to vectorize the early exit, unknown control fow"
> > + " in stmt %G", last_stmt);
> > + workset.safe_push (gimple_cond_lhs (last_cond));
> > + workset.safe_push (gimple_cond_rhs (last_cond));
> > +
> > + imm_use_iterator imm_iter;
> > + use_operand_p use_p;
> > + tree lhs;
> > + do
> > + {
> > + tree op = workset.pop ();
> > + if (visited.add (op))
> > + continue;
> > + stmt_vec_info stmt_vinfo = loop_vinfo->lookup_def (op);
> > +
> > + /* Not defined in loop, don't care. */
> > + if (!stmt_vinfo)
> > + continue;
> > + gimple *stmt = STMT_VINFO_STMT (stmt_vinfo);
> > + auto dr_ref = STMT_VINFO_DATA_REF (stmt_vinfo);
> > + if (dr_ref)
> > + {
> > + opt_result res
> > + = vect_analyze_early_break_dependences_1 (dr_ref, stmt);
> > + if (!res)
> > + return res;
> > + }
> > + else
> > + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, op)
> > + if ((lhs = gimple_get_lhs (USE_STMT (use_p))))
> > + workset.safe_push (lhs);
> > + }
> > + while (!workset.is_empty ());
> > +
> > /* We don't allow outer -> inner loop transitions which should have been
> > trapped already during loop form analysis. */
> > gcc_assert (dest_bb->loop_father == loop);
> >
> >
> >
> >
> >
>
> --
> Richard Biener <rguenther@suse.de>
> SUSE Software Solutions Germany GmbH,
> Frankenstrasse 146, 90461 Nuernberg, Germany;
> GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
On Tue, 30 Jan 2024, Tamar Christina wrote:
> > -----Original Message-----
> > From: Richard Biener <rguenther@suse.de>
> > Sent: Tuesday, January 30, 2024 9:51 AM
> > To: Tamar Christina <Tamar.Christina@arm.com>
> > Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>; jlaw@ventanamicro.com
> > Subject: Re: [PATCH]middle-end: check memory accesses in the destination block
> > [PR113588].
> >
> > On Mon, 29 Jan 2024, Tamar Christina wrote:
> >
> > > Hi All,
> > >
> > > When analyzing loads for early break it was always the intention that
> > > for the exit where things get moved to we only check the loads that can
> > > be reached from the condition.
> >
> > Looking at the code I'm a bit confused that we always move to
> > single_pred (loop->latch) - IIRC that was different at some point?
> >
> > Shouldn't we move stores after the last early exit condition instead?
>
> Yes it was changed during another PR fix. The rationale at that time didn't take into account
> the peeled case. It used to be that we would "search" for the the exit to place it in.
>
> At that time the rational was, well it doesn't make sense. It has to go in the block that is the
> last to be executed. With the non-peeled case it's always the one before the latch.
>
> Or put differently, I think the destination should be the main IV block. I am not quite sure
> I'm following why you want to put the peeled cases inside the latch block.
>
> Ah, is it because the latch block is always going to only be executed when you make a full iteration?
> That makes sense, but then I think we should also analyze the stores in all blocks (which your change
> maybe already does, let me check) since we'll also lifting past the final block we need to update the vuses
> there too.
>
> If the above is correct then I think I understand what you're saying and
> will update the patch and do some Checks.
Yes, I think that's what I wanted to say.
Richard.
> Thanks,
> Tamar
>
> >
> > In particular for the peeled case single_pred (loop->latch) is the
> > block with the actual early exit condition? So for that case we'd
> > need to move to the latch itself instead? For non-peeled we move
> > to the block with the IV condition which looks OK.
> >
> > > However the main loop checks all loads and we skip the destination BB.
> > > As such we never actually check the loads reachable from the COND in the
> > > last BB unless this BB was also the exit chosen by the vectorizer.
> > >
> > > This leads us to incorrectly vectorize the loop in the PR and in doing so access
> > > out of bounds.
> > >
> > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> > >
> > > Ok for master?
> >
> > The patch ends up with a worklist and another confusing comment
> >
> > + /* For the destination BB we need to only analyze loads reachable from
> > the early
> > + break statement itself. */
> >
> > But I think it's a downstream issue from the issue above. That said,
> > even for the non-peeled case we need to check ref_within_array_bound,
> > no?
> >
> > So what about re-doing that initial loop like the following instead
> > (and also fix dest_bb, but I'd like clarification here). Basically
> > walk all blocks, do the ref_within_array_bound first and only
> > after we've seen 'dest_bb' do the checks required for moving
> > stores for all upstream BBs.
> >
> > And dest_bb should be
> >
> > /* Move side-effects to the in-loop destination of the last early
> > exit. */
> > if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
> > dest_bb = loop->latch;
> > else
> > dest_bb = single_pred (loop->latch);
> >
> >
> > diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
> > index f592aeb8028..d6c8910dd6c 100644
> > --- a/gcc/tree-vect-data-refs.cc
> > +++ b/gcc/tree-vect-data-refs.cc
> > @@ -668,7 +668,6 @@ vect_analyze_early_break_dependences (loop_vec_info
> > loop_vinfo)
> > auto_vec<data_reference *> bases;
> > basic_block dest_bb = NULL;
> >
> > - hash_set <gimple *> visited;
> > class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
> > class loop *loop_nest = loop_outer (loop);
> >
> > @@ -681,15 +680,11 @@ vect_analyze_early_break_dependences
> > (loop_vec_info loop_vinfo)
> > side-effects to is always the latch connected exit. When we support
> > general control flow we can do better but for now this is fine. */
> > dest_bb = single_pred (loop->latch);
> > - basic_block bb = dest_bb;
> > + basic_block bb = loop->latch;
> > + bool check_deps = false;
> >
> > do
> > {
> > - /* If the destination block is also the header then we have nothing to do. */
> > - if (!single_pred_p (bb))
> > - continue;
> > -
> > - bb = single_pred (bb);
> > gimple_stmt_iterator gsi = gsi_last_bb (bb);
> >
> > /* Now analyze all the remaining statements and try to determine which
> > @@ -707,6 +702,25 @@ vect_analyze_early_break_dependences (loop_vec_info
> > loop_vinfo)
> > if (!dr_ref)
> > continue;
> >
> > + /* Check if vector accesses to the object will be within bounds.
> > + must be a constant or assume loop will be versioned or niters
> > + bounded by VF so accesses are within range. */
> > + if (!ref_within_array_bound (stmt, DR_REF (dr_ref)))
> > + {
> > + if (dump_enabled_p ())
> > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > + "early breaks not supported: vectorization "
> > + "would %s beyond size of obj.",
> > + DR_IS_READ (dr_ref) ? "read" : "write");
> > + return opt_result::failure_at (stmt,
> > + "can't safely apply code motion to "
> > + "dependencies of %G to vectorize "
> > + "the early exit.\n", stmt);
> > + }
> > +
> > + if (!check_deps)
> > + continue;
> > +
> > /* We currently only support statically allocated objects due to
> > not having first-faulting loads support or peeling for
> > alignment support. Compute the size of the referenced object
> > @@ -739,22 +753,6 @@ vect_analyze_early_break_dependences (loop_vec_info
> > loop_vinfo)
> > "the early exit.\n", stmt);
> > }
> >
> > - /* Check if vector accesses to the object will be within bounds.
> > - must be a constant or assume loop will be versioned or niters
> > - bounded by VF so accesses are within range. */
> > - if (!ref_within_array_bound (stmt, DR_REF (dr_ref)))
> > - {
> > - if (dump_enabled_p ())
> > - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > - "early breaks not supported: vectorization "
> > - "would %s beyond size of obj.",
> > - DR_IS_READ (dr_ref) ? "read" : "write");
> > - return opt_result::failure_at (stmt,
> > - "can't safely apply code motion to "
> > - "dependencies of %G to vectorize "
> > - "the early exit.\n", stmt);
> > - }
> > -
> > if (DR_IS_READ (dr_ref))
> > bases.safe_push (dr_ref);
> > else if (DR_IS_WRITE (dr_ref))
> > @@ -814,8 +812,16 @@ vect_analyze_early_break_dependences (loop_vec_info
> > loop_vinfo)
> > "marked statement for vUSE update: %G", stmt);
> > }
> > }
> > + if (!single_pred_p (bb))
> > + {
> > + gcc_assert (bb == loop->header);
> > + break;
> > + }
> > + if (bb == dest_bb)
> > + check_deps = true;
> > + bb = single_pred (bb);
> > }
> > - while (bb != loop->header);
> > + while (1);
> >
> > /* We don't allow outer -> inner loop transitions which should have been
> > trapped already during loop form analysis. */
> >
> > > Thanks,
> > > Tamar
> > >
> > > gcc/ChangeLog:
> > >
> > > PR tree-optimization/113588
> > > * tree-vect-data-refs.cc (vect_analyze_early_break_dependences_1): New.
> > > (vect_analyze_data_ref_dependence): Use it.
> > > (vect_analyze_early_break_dependences): Update comments.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > > PR tree-optimization/113588
> > > * gcc.dg/vect/vect-early-break_108-pr113588.c: New test.
> > > * gcc.dg/vect/vect-early-break_109-pr113588.c: New test.
> > >
> > > --- inline copy of patch --
> > > diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_108-pr113588.c
> > b/gcc/testsuite/gcc.dg/vect/vect-early-break_108-pr113588.c
> > > new file mode 100644
> > > index
> > 0000000000000000000000000000000000000000..e488619c9aac41fafbcf479
> > 818392a6bb7c6924f
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_108-pr113588.c
> > > @@ -0,0 +1,15 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-add-options vect_early_break } */
> > > +/* { dg-require-effective-target vect_early_break } */
> > > +/* { dg-require-effective-target vect_int } */
> > > +
> > > +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
> > > +
> > > +int foo (const char *s, unsigned long n)
> > > +{
> > > + unsigned long len = 0;
> > > + while (*s++ && n--)
> > > + ++len;
> > > + return len;
> > > +}
> > > +
> > > diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_109-pr113588.c
> > b/gcc/testsuite/gcc.dg/vect/vect-early-break_109-pr113588.c
> > > new file mode 100644
> > > index
> > 0000000000000000000000000000000000000000..488c19d3ede809631d1a7
> > ede0e7f7bcdc7a1ae43
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_109-pr113588.c
> > > @@ -0,0 +1,44 @@
> > > +/* { dg-add-options vect_early_break } */
> > > +/* { dg-require-effective-target vect_early_break } */
> > > +/* { dg-require-effective-target vect_int } */
> > > +/* { dg-require-effective-target mmap } */
> > > +
> > > +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
> > > +
> > > +#include <sys/mman.h>
> > > +#include <unistd.h>
> > > +
> > > +#include "tree-vect.h"
> > > +
> > > +__attribute__((noipa))
> > > +int foo (const char *s, unsigned long n)
> > > +{
> > > + unsigned long len = 0;
> > > + while (*s++ && n--)
> > > + ++len;
> > > + return len;
> > > +}
> > > +
> > > +int main()
> > > +{
> > > +
> > > + check_vect ();
> > > +
> > > + long pgsz = sysconf (_SC_PAGESIZE);
> > > + void *p = mmap (NULL, pgsz * 3, PROT_READ|PROT_WRITE,
> > > + MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
> > > + if (p == MAP_FAILED)
> > > + return 0;
> > > + mprotect (p, pgsz, PROT_NONE);
> > > + mprotect (p+2*pgsz, pgsz, PROT_NONE);
> > > + char *p1 = p + pgsz;
> > > + p1[0] = 1;
> > > + p1[1] = 0;
> > > + foo (p1, 1000);
> > > + p1 = p + 2*pgsz - 2;
> > > + p1[0] = 1;
> > > + p1[1] = 0;
> > > + foo (p1, 1000);
> > > + return 0;
> > > +}
> > > +
> > > diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
> > > index
> > f592aeb8028afd4fd70e2175104efab2a2c0d82e..52cef242a7ce5d0e525bff639fa
> > 1dc2f0a6f30b9 100644
> > > --- a/gcc/tree-vect-data-refs.cc
> > > +++ b/gcc/tree-vect-data-refs.cc
> > > @@ -619,10 +619,69 @@ vect_analyze_data_ref_dependence (struct
> > data_dependence_relation *ddr,
> > > return opt_result::success ();
> > > }
> > >
> > > -/* Funcion vect_analyze_early_break_dependences.
> > > +/* Function vect_analyze_early_break_dependences_1
> > >
> > > - Examime all the data references in the loop and make sure that if we have
> > > - mulitple exits that we are able to safely move stores such that they become
> > > + Helper function of vect_analyze_early_break_dependences which performs
> > safety
> > > + analysis for load operations in an early break. */
> > > +
> > > +static opt_result
> > > +vect_analyze_early_break_dependences_1 (data_reference *dr_ref, gimple
> > *stmt)
> > > +{
> > > + /* We currently only support statically allocated objects due to
> > > + not having first-faulting loads support or peeling for
> > > + alignment support. Compute the size of the referenced object
> > > + (it could be dynamically allocated). */
> > > + tree obj = DR_BASE_ADDRESS (dr_ref);
> > > + if (!obj || TREE_CODE (obj) != ADDR_EXPR)
> > > + {
> > > + if (dump_enabled_p ())
> > > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > > + "early breaks only supported on statically"
> > > + " allocated objects.\n");
> > > + return opt_result::failure_at (stmt,
> > > + "can't safely apply code motion to "
> > > + "dependencies of %G to vectorize "
> > > + "the early exit.\n", stmt);
> > > + }
> > > +
> > > + tree refop = TREE_OPERAND (obj, 0);
> > > + tree refbase = get_base_address (refop);
> > > + if (!refbase || !DECL_P (refbase) || !DECL_SIZE (refbase)
> > > + || TREE_CODE (DECL_SIZE (refbase)) != INTEGER_CST)
> > > + {
> > > + if (dump_enabled_p ())
> > > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > > + "early breaks only supported on"
> > > + " statically allocated objects.\n");
> > > + return opt_result::failure_at (stmt,
> > > + "can't safely apply code motion to "
> > > + "dependencies of %G to vectorize "
> > > + "the early exit.\n", stmt);
> > > + }
> > > +
> > > + /* Check if vector accesses to the object will be within bounds.
> > > + must be a constant or assume loop will be versioned or niters
> > > + bounded by VF so accesses are within range. */
> > > + if (!ref_within_array_bound (stmt, DR_REF (dr_ref)))
> > > + {
> > > + if (dump_enabled_p ())
> > > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > > + "early breaks not supported: vectorization "
> > > + "would %s beyond size of obj.",
> > > + DR_IS_READ (dr_ref) ? "read" : "write");
> > > + return opt_result::failure_at (stmt,
> > > + "can't safely apply code motion to "
> > > + "dependencies of %G to vectorize "
> > > + "the early exit.\n", stmt);
> > > + }
> > > +
> > > + return opt_result::success ();
> > > +}
> > > +
> > > +/* Function vect_analyze_early_break_dependences.
> > > +
> > > + Examine all the data references in the loop and make sure that if we have
> > > + multiple exits that we are able to safely move stores such that they become
> > > safe for vectorization. The function also calculates the place where to move
> > > the instructions to and computes what the new vUSE chain should be.
> > >
> > > @@ -639,7 +698,7 @@ vect_analyze_data_ref_dependence (struct
> > data_dependence_relation *ddr,
> > > - Multiple loads are allowed as long as they don't alias.
> > >
> > > NOTE:
> > > - This implemementation is very conservative. Any overlappig loads/stores
> > > + This implementation is very conservative. Any overlapping loads/stores
> > > that take place before the early break statement gets rejected aside from
> > > WAR dependencies.
> > >
> > > @@ -668,7 +727,6 @@ vect_analyze_early_break_dependences (loop_vec_info
> > loop_vinfo)
> > > auto_vec<data_reference *> bases;
> > > basic_block dest_bb = NULL;
> > >
> > > - hash_set <gimple *> visited;
> > > class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
> > > class loop *loop_nest = loop_outer (loop);
> > >
> > > @@ -683,6 +741,7 @@ vect_analyze_early_break_dependences (loop_vec_info
> > loop_vinfo)
> > > dest_bb = single_pred (loop->latch);
> > > basic_block bb = dest_bb;
> > >
> > > + /* First analyse all blocks leading to dest_bb excluding dest_bb itself. */
> > > do
> > > {
> > > /* If the destination block is also the header then we have nothing to do. */
> > > @@ -707,53 +766,11 @@ vect_analyze_early_break_dependences
> > (loop_vec_info loop_vinfo)
> > > if (!dr_ref)
> > > continue;
> > >
> > > - /* We currently only support statically allocated objects due to
> > > - not having first-faulting loads support or peeling for
> > > - alignment support. Compute the size of the referenced object
> > > - (it could be dynamically allocated). */
> > > - tree obj = DR_BASE_ADDRESS (dr_ref);
> > > - if (!obj || TREE_CODE (obj) != ADDR_EXPR)
> > > - {
> > > - if (dump_enabled_p ())
> > > - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > > - "early breaks only supported on statically"
> > > - " allocated objects.\n");
> > > - return opt_result::failure_at (stmt,
> > > - "can't safely apply code motion to "
> > > - "dependencies of %G to vectorize "
> > > - "the early exit.\n", stmt);
> > > - }
> > > -
> > > - tree refop = TREE_OPERAND (obj, 0);
> > > - tree refbase = get_base_address (refop);
> > > - if (!refbase || !DECL_P (refbase) || !DECL_SIZE (refbase)
> > > - || TREE_CODE (DECL_SIZE (refbase)) != INTEGER_CST)
> > > - {
> > > - if (dump_enabled_p ())
> > > - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > > - "early breaks only supported on"
> > > - " statically allocated objects.\n");
> > > - return opt_result::failure_at (stmt,
> > > - "can't safely apply code motion to "
> > > - "dependencies of %G to vectorize "
> > > - "the early exit.\n", stmt);
> > > - }
> > > -
> > > - /* Check if vector accesses to the object will be within bounds.
> > > - must be a constant or assume loop will be versioned or niters
> > > - bounded by VF so accesses are within range. */
> > > - if (!ref_within_array_bound (stmt, DR_REF (dr_ref)))
> > > - {
> > > - if (dump_enabled_p ())
> > > - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > > - "early breaks not supported: vectorization "
> > > - "would %s beyond size of obj.",
> > > - DR_IS_READ (dr_ref) ? "read" : "write");
> > > - return opt_result::failure_at (stmt,
> > > - "can't safely apply code motion to "
> > > - "dependencies of %G to vectorize "
> > > - "the early exit.\n", stmt);
> > > - }
> > > + /* Check if the operation is one we can safely do. */
> > > + opt_result res
> > > + = vect_analyze_early_break_dependences_1 (dr_ref, stmt);
> > > + if (!res)
> > > + return res;
> > >
> > > if (DR_IS_READ (dr_ref))
> > > bases.safe_push (dr_ref);
> > > @@ -817,6 +834,51 @@ vect_analyze_early_break_dependences
> > (loop_vec_info loop_vinfo)
> > > }
> > > while (bb != loop->header);
> > >
> > > + /* For the destination BB we need to only analyze loads reachable from the
> > early
> > > + break statement itself. */
> > > + auto_vec <tree> workset;
> > > + hash_set <tree> visited;
> > > + gimple *last_stmt = gsi_stmt (gsi_last_bb (dest_bb));
> > > + gcond *last_cond = dyn_cast <gcond *> (last_stmt);
> > > + /* If the cast fails we have a different control flow statement in the latch. Most
> > > + commonly this is a switch. */
> > > + if (!last_cond)
> > > + return opt_result::failure_at (last_stmt,
> > > + "can't safely apply code motion to dependencies"
> > > + " to vectorize the early exit, unknown control fow"
> > > + " in stmt %G", last_stmt);
> > > + workset.safe_push (gimple_cond_lhs (last_cond));
> > > + workset.safe_push (gimple_cond_rhs (last_cond));
> > > +
> > > + imm_use_iterator imm_iter;
> > > + use_operand_p use_p;
> > > + tree lhs;
> > > + do
> > > + {
> > > + tree op = workset.pop ();
> > > + if (visited.add (op))
> > > + continue;
> > > + stmt_vec_info stmt_vinfo = loop_vinfo->lookup_def (op);
> > > +
> > > + /* Not defined in loop, don't care. */
> > > + if (!stmt_vinfo)
> > > + continue;
> > > + gimple *stmt = STMT_VINFO_STMT (stmt_vinfo);
> > > + auto dr_ref = STMT_VINFO_DATA_REF (stmt_vinfo);
> > > + if (dr_ref)
> > > + {
> > > + opt_result res
> > > + = vect_analyze_early_break_dependences_1 (dr_ref, stmt);
> > > + if (!res)
> > > + return res;
> > > + }
> > > + else
> > > + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, op)
> > > + if ((lhs = gimple_get_lhs (USE_STMT (use_p))))
> > > + workset.safe_push (lhs);
> > > + }
> > > + while (!workset.is_empty ());
> > > +
> > > /* We don't allow outer -> inner loop transitions which should have been
> > > trapped already during loop form analysis. */
> > > gcc_assert (dest_bb->loop_father == loop);
> > >
> > >
> > >
> > >
> > >
> >
> > --
> > Richard Biener <rguenther@suse.de>
> > SUSE Software Solutions Germany GmbH,
> > Frankenstrasse 146, 90461 Nuernberg, Germany;
> > GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
>
> >
> > If the above is correct then I think I understand what you're saying and
> > will update the patch and do some Checks.
>
> Yes, I think that's what I wanted to say.
>
As discussed:
Bootstrapped Regtested on aarch64-none-linux-gnu and x86_64-pc-linux-gnu no issues.
Also checked both with --enable-lto --with-build-config='bootstrap-O3 bootstrap-lto' --enable-multilib
and --enable-lto --with-build-config=bootstrap-O3 --enable-checking=release,yes,rtl,extra;
and checked the libcrypt testsuite as reported on PR113467.
Ok for master?
Thanks,
Tamar
gcc/ChangeLog:
PR tree-optimization/113588
PR tree-optimization/113467
(vect_analyze_data_ref_dependence): Choose correct dest and fix checks.
(vect_analyze_early_break_dependences): Update comments.
gcc/testsuite/ChangeLog:
PR tree-optimization/113588
PR tree-optimization/113467
* gcc.dg/vect/vect-early-break_108-pr113588.c: New test.
* gcc.dg/vect/vect-early-break_109-pr113588.c: New test.
--- inline copy of patch ---
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_108-pr113588.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_108-pr113588.c
new file mode 100644
index 0000000000000000000000000000000000000000..e488619c9aac41fafbcf479818392a6bb7c6924f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_108-pr113588.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
+
+int foo (const char *s, unsigned long n)
+{
+ unsigned long len = 0;
+ while (*s++ && n--)
+ ++len;
+ return len;
+}
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_109-pr113588.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_109-pr113588.c
new file mode 100644
index 0000000000000000000000000000000000000000..488c19d3ede809631d1a7ede0e7f7bcdc7a1ae43
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_109-pr113588.c
@@ -0,0 +1,44 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target mmap } */
+
+/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
+
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "tree-vect.h"
+
+__attribute__((noipa))
+int foo (const char *s, unsigned long n)
+{
+ unsigned long len = 0;
+ while (*s++ && n--)
+ ++len;
+ return len;
+}
+
+int main()
+{
+
+ check_vect ();
+
+ long pgsz = sysconf (_SC_PAGESIZE);
+ void *p = mmap (NULL, pgsz * 3, PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
+ if (p == MAP_FAILED)
+ return 0;
+ mprotect (p, pgsz, PROT_NONE);
+ mprotect (p+2*pgsz, pgsz, PROT_NONE);
+ char *p1 = p + pgsz;
+ p1[0] = 1;
+ p1[1] = 0;
+ foo (p1, 1000);
+ p1 = p + 2*pgsz - 2;
+ p1[0] = 1;
+ p1[1] = 0;
+ foo (p1, 1000);
+ return 0;
+}
+
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index f592aeb8028afd4fd70e2175104efab2a2c0d82e..53fdfc25d7dc2deb7788176252697d2e455555fc 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -619,10 +619,10 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
return opt_result::success ();
}
-/* Funcion vect_analyze_early_break_dependences.
+/* Function vect_analyze_early_break_dependences.
- Examime all the data references in the loop and make sure that if we have
- mulitple exits that we are able to safely move stores such that they become
+ Examine all the data references in the loop and make sure that if we have
+ multiple exits that we are able to safely move stores such that they become
safe for vectorization. The function also calculates the place where to move
the instructions to and computes what the new vUSE chain should be.
@@ -639,7 +639,7 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
- Multiple loads are allowed as long as they don't alias.
NOTE:
- This implemementation is very conservative. Any overlappig loads/stores
+ This implementation is very conservative. Any overlapping loads/stores
that take place before the early break statement gets rejected aside from
WAR dependencies.
@@ -668,7 +668,6 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
auto_vec<data_reference *> bases;
basic_block dest_bb = NULL;
- hash_set <gimple *> visited;
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
class loop *loop_nest = loop_outer (loop);
@@ -677,19 +676,33 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
"loop contains multiple exits, analyzing"
" statement dependencies.\n");
+ if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "alternate exit has been chosen as main exit.\n");
+
/* Since we don't support general control flow, the location we'll move the
side-effects to is always the latch connected exit. When we support
- general control flow we can do better but for now this is fine. */
- dest_bb = single_pred (loop->latch);
+ general control flow we can do better but for now this is fine. Move
+ side-effects to the in-loop destination of the last early exit. For the PEELED
+ case we move the side-effects to the latch block as this is guaranteed to be the
+ last block to be executed when a vector iteration finished. */
+ if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
+ dest_bb = loop->latch;
+ else
+ dest_bb = single_pred (loop->latch);
+
+ /* We start looking from dest_bb, for the non-PEELED case we don't want to
+ move any stores already present, but we do want to read and validate the
+ loads. */
basic_block bb = dest_bb;
+ /* In the peeled case we need to check all the loads in the loop since to move the
+ the stores we lift the stores over all loads into the latch. */
+ bool check_deps = LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo);
+
do
{
- /* If the destination block is also the header then we have nothing to do. */
- if (!single_pred_p (bb))
- continue;
-
- bb = single_pred (bb);
gimple_stmt_iterator gsi = gsi_last_bb (bb);
/* Now analyze all the remaining statements and try to determine which
@@ -707,42 +720,13 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
if (!dr_ref)
continue;
- /* We currently only support statically allocated objects due to
- not having first-faulting loads support or peeling for
- alignment support. Compute the size of the referenced object
- (it could be dynamically allocated). */
- tree obj = DR_BASE_ADDRESS (dr_ref);
- if (!obj || TREE_CODE (obj) != ADDR_EXPR)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "early breaks only supported on statically"
- " allocated objects.\n");
- return opt_result::failure_at (stmt,
- "can't safely apply code motion to "
- "dependencies of %G to vectorize "
- "the early exit.\n", stmt);
- }
-
- tree refop = TREE_OPERAND (obj, 0);
- tree refbase = get_base_address (refop);
- if (!refbase || !DECL_P (refbase) || !DECL_SIZE (refbase)
- || TREE_CODE (DECL_SIZE (refbase)) != INTEGER_CST)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "early breaks only supported on"
- " statically allocated objects.\n");
- return opt_result::failure_at (stmt,
- "can't safely apply code motion to "
- "dependencies of %G to vectorize "
- "the early exit.\n", stmt);
- }
-
/* Check if vector accesses to the object will be within bounds.
must be a constant or assume loop will be versioned or niters
- bounded by VF so accesses are within range. */
- if (!ref_within_array_bound (stmt, DR_REF (dr_ref)))
+ bounded by VF so accesses are within range. We only need to check the
+ reads since writes are moved to a safe place where if we get there we
+ know they are safe to perform. */
+ if (DR_IS_READ (dr_ref)
+ && !ref_within_array_bound (stmt, DR_REF (dr_ref)))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -755,6 +739,9 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
"the early exit.\n", stmt);
}
+ if (!check_deps)
+ continue;
+
if (DR_IS_READ (dr_ref))
bases.safe_push (dr_ref);
else if (DR_IS_WRITE (dr_ref))
@@ -814,8 +801,19 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
"marked statement for vUSE update: %G", stmt);
}
}
+
+ if (!single_pred_p (bb))
+ {
+ gcc_assert (bb == loop->header);
+ break;
+ }
+
+ /* For the non-PEELED case we don't want to check the loads in the IV exit block
+ for dependencies with the stores, but any block preceeding it we do. */
+ check_deps = true;
+ bb = single_pred (bb);
}
- while (bb != loop->header);
+ while (1);
/* We don't allow outer -> inner loop transitions which should have been
trapped already during loop form analysis. */
On 2/1/24 22:33, Tamar Christina wrote:
> Bootstrapped Regtested on aarch64-none-linux-gnu and x86_64-pc-linux-gnu no issues.
> Also checked both with --enable-lto --with-build-config='bootstrap-O3 bootstrap-lto' --enable-multilib
> and --enable-lto --with-build-config=bootstrap-O3 --enable-checking=release,yes,rtl,extra;
> and checked the libcrypt testsuite as reported on PR113467.
Note that I still run into problems if bootstrapping
--with-build-config=bootstrap-O3
(https://gcc.gnu.org/pipermail/gcc-testresults/2024-February/806840.html),
but it is not visible.
That is because it happens in the test suite of gmp, which I build
locally as part of the build.
It *is* visible in the full log of the bootstrap:
toon@moene:~/compilers$ grep ^FAIL log-thunderx-r14-8681
FAIL: libphobos.exceptions/rt_trap_exceptions.d output pattern test
FAIL: tacos
FAIL: tacosh
FAIL: tadd_si
FAIL: tadd_ui
FAIL: targ
FAIL: tasin
FAIL: tasinh
FAIL: tatan
FAIL: tatanh
FAIL: tcos
FAIL: tcosh
....
Quite a few of these "t" routines I had to kill by hand because they hang.
With a standard bootstrap I do not have that problem.
Kind regards,
> Am 01.02.2024 um 22:34 schrieb Tamar Christina <tamar.christina@arm.com>:
>
>
>>
>>>
>>> If the above is correct then I think I understand what you're saying and
>>> will update the patch and do some Checks.
>>
>> Yes, I think that's what I wanted to say.
>>
>
> As discussed:
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and x86_64-pc-linux-gnu no issues.
> Also checked both with --enable-lto --with-build-config='bootstrap-O3 bootstrap-lto' --enable-multilib
> and --enable-lto --with-build-config=bootstrap-O3 --enable-checking=release,yes,rtl,extra;
> and checked the libcrypt testsuite as reported on PR113467.
>
> Ok for master?
Ok
Richard
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> PR tree-optimization/113588
> PR tree-optimization/113467
> (vect_analyze_data_ref_dependence): Choose correct dest and fix checks.
> (vect_analyze_early_break_dependences): Update comments.
>
> gcc/testsuite/ChangeLog:
>
> PR tree-optimization/113588
> PR tree-optimization/113467
> * gcc.dg/vect/vect-early-break_108-pr113588.c: New test.
> * gcc.dg/vect/vect-early-break_109-pr113588.c: New test.
>
> --- inline copy of patch ---
>
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_108-pr113588.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_108-pr113588.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..e488619c9aac41fafbcf479818392a6bb7c6924f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_108-pr113588.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-add-options vect_early_break } */
> +/* { dg-require-effective-target vect_early_break } */
> +/* { dg-require-effective-target vect_int } */
> +
> +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
> +
> +int foo (const char *s, unsigned long n)
> +{
> + unsigned long len = 0;
> + while (*s++ && n--)
> + ++len;
> + return len;
> +}
> +
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_109-pr113588.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_109-pr113588.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..488c19d3ede809631d1a7ede0e7f7bcdc7a1ae43
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_109-pr113588.c
> @@ -0,0 +1,44 @@
> +/* { dg-add-options vect_early_break } */
> +/* { dg-require-effective-target vect_early_break } */
> +/* { dg-require-effective-target vect_int } */
> +/* { dg-require-effective-target mmap } */
> +
> +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
> +
> +#include <sys/mman.h>
> +#include <unistd.h>
> +
> +#include "tree-vect.h"
> +
> +__attribute__((noipa))
> +int foo (const char *s, unsigned long n)
> +{
> + unsigned long len = 0;
> + while (*s++ && n--)
> + ++len;
> + return len;
> +}
> +
> +int main()
> +{
> +
> + check_vect ();
> +
> + long pgsz = sysconf (_SC_PAGESIZE);
> + void *p = mmap (NULL, pgsz * 3, PROT_READ|PROT_WRITE,
> + MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
> + if (p == MAP_FAILED)
> + return 0;
> + mprotect (p, pgsz, PROT_NONE);
> + mprotect (p+2*pgsz, pgsz, PROT_NONE);
> + char *p1 = p + pgsz;
> + p1[0] = 1;
> + p1[1] = 0;
> + foo (p1, 1000);
> + p1 = p + 2*pgsz - 2;
> + p1[0] = 1;
> + p1[1] = 0;
> + foo (p1, 1000);
> + return 0;
> +}
> +
> diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
> index f592aeb8028afd4fd70e2175104efab2a2c0d82e..53fdfc25d7dc2deb7788176252697d2e455555fc 100644
> --- a/gcc/tree-vect-data-refs.cc
> +++ b/gcc/tree-vect-data-refs.cc
> @@ -619,10 +619,10 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
> return opt_result::success ();
> }
>
> -/* Funcion vect_analyze_early_break_dependences.
> +/* Function vect_analyze_early_break_dependences.
>
> - Examime all the data references in the loop and make sure that if we have
> - mulitple exits that we are able to safely move stores such that they become
> + Examine all the data references in the loop and make sure that if we have
> + multiple exits that we are able to safely move stores such that they become
> safe for vectorization. The function also calculates the place where to move
> the instructions to and computes what the new vUSE chain should be.
>
> @@ -639,7 +639,7 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
> - Multiple loads are allowed as long as they don't alias.
>
> NOTE:
> - This implemementation is very conservative. Any overlappig loads/stores
> + This implementation is very conservative. Any overlapping loads/stores
> that take place before the early break statement gets rejected aside from
> WAR dependencies.
>
> @@ -668,7 +668,6 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
> auto_vec<data_reference *> bases;
> basic_block dest_bb = NULL;
>
> - hash_set <gimple *> visited;
> class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
> class loop *loop_nest = loop_outer (loop);
>
> @@ -677,19 +676,33 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
> "loop contains multiple exits, analyzing"
> " statement dependencies.\n");
>
> + if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_NOTE, vect_location,
> + "alternate exit has been chosen as main exit.\n");
> +
> /* Since we don't support general control flow, the location we'll move the
> side-effects to is always the latch connected exit. When we support
> - general control flow we can do better but for now this is fine. */
> - dest_bb = single_pred (loop->latch);
> + general control flow we can do better but for now this is fine. Move
> + side-effects to the in-loop destination of the last early exit. For the PEELED
> + case we move the side-effects to the latch block as this is guaranteed to be the
> + last block to be executed when a vector iteration finished. */
> + if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
> + dest_bb = loop->latch;
> + else
> + dest_bb = single_pred (loop->latch);
> +
> + /* We start looking from dest_bb, for the non-PEELED case we don't want to
> + move any stores already present, but we do want to read and validate the
> + loads. */
> basic_block bb = dest_bb;
>
> + /* In the peeled case we need to check all the loads in the loop since to move the
> + the stores we lift the stores over all loads into the latch. */
> + bool check_deps = LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo);
> +
> do
> {
> - /* If the destination block is also the header then we have nothing to do. */
> - if (!single_pred_p (bb))
> - continue;
> -
> - bb = single_pred (bb);
> gimple_stmt_iterator gsi = gsi_last_bb (bb);
>
> /* Now analyze all the remaining statements and try to determine which
> @@ -707,42 +720,13 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
> if (!dr_ref)
> continue;
>
> - /* We currently only support statically allocated objects due to
> - not having first-faulting loads support or peeling for
> - alignment support. Compute the size of the referenced object
> - (it could be dynamically allocated). */
> - tree obj = DR_BASE_ADDRESS (dr_ref);
> - if (!obj || TREE_CODE (obj) != ADDR_EXPR)
> - {
> - if (dump_enabled_p ())
> - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> - "early breaks only supported on statically"
> - " allocated objects.\n");
> - return opt_result::failure_at (stmt,
> - "can't safely apply code motion to "
> - "dependencies of %G to vectorize "
> - "the early exit.\n", stmt);
> - }
> -
> - tree refop = TREE_OPERAND (obj, 0);
> - tree refbase = get_base_address (refop);
> - if (!refbase || !DECL_P (refbase) || !DECL_SIZE (refbase)
> - || TREE_CODE (DECL_SIZE (refbase)) != INTEGER_CST)
> - {
> - if (dump_enabled_p ())
> - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> - "early breaks only supported on"
> - " statically allocated objects.\n");
> - return opt_result::failure_at (stmt,
> - "can't safely apply code motion to "
> - "dependencies of %G to vectorize "
> - "the early exit.\n", stmt);
> - }
> -
> /* Check if vector accesses to the object will be within bounds.
> must be a constant or assume loop will be versioned or niters
> - bounded by VF so accesses are within range. */
> - if (!ref_within_array_bound (stmt, DR_REF (dr_ref)))
> + bounded by VF so accesses are within range. We only need to check the
> + reads since writes are moved to a safe place where if we get there we
> + know they are safe to perform. */
> + if (DR_IS_READ (dr_ref)
> + && !ref_within_array_bound (stmt, DR_REF (dr_ref)))
> {
> if (dump_enabled_p ())
> dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> @@ -755,6 +739,9 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
> "the early exit.\n", stmt);
> }
>
> + if (!check_deps)
> + continue;
> +
> if (DR_IS_READ (dr_ref))
> bases.safe_push (dr_ref);
> else if (DR_IS_WRITE (dr_ref))
> @@ -814,8 +801,19 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
> "marked statement for vUSE update: %G", stmt);
> }
> }
> +
> + if (!single_pred_p (bb))
> + {
> + gcc_assert (bb == loop->header);
> + break;
> + }
> +
> + /* For the non-PEELED case we don't want to check the loads in the IV exit block
> + for dependencies with the stores, but any block preceeding it we do. */
> + check_deps = true;
> + bb = single_pred (bb);
> }
> - while (bb != loop->header);
> + while (1);
>
> /* We don't allow outer -> inner loop transitions which should have been
> trapped already during loop form analysis. */
> <rb18216.patch>
Toon Moene <toon@moene.org> writes:
> On 2/1/24 22:33, Tamar Christina wrote:
>
>> Bootstrapped Regtested on aarch64-none-linux-gnu and x86_64-pc-linux-gnu no issues.
>> Also checked both with --enable-lto --with-build-config='bootstrap-O3 bootstrap-lto' --enable-multilib
>> and --enable-lto --with-build-config=bootstrap-O3 --enable-checking=release,yes,rtl,extra;
>> and checked the libcrypt testsuite as reported on PR113467.
>
> Note that I still run into problems if bootstrapping
> --with-build-config=bootstrap-O3
> (https://gcc.gnu.org/pipermail/gcc-testresults/2024-February/806840.html),
> but it is not visible.
>
> That is because it happens in the test suite of gmp, which I build
> locally as part of the build.
>
> It *is* visible in the full log of the bootstrap:
Can you file a bug please? The GMP test suite passes for me. It sounds
like it _might_ be PR113576?
> [...]
thanks,
sam
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
+
+int foo (const char *s, unsigned long n)
+{
+ unsigned long len = 0;
+ while (*s++ && n--)
+ ++len;
+ return len;
+}
+
new file mode 100644
@@ -0,0 +1,44 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target mmap } */
+
+/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
+
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "tree-vect.h"
+
+__attribute__((noipa))
+int foo (const char *s, unsigned long n)
+{
+ unsigned long len = 0;
+ while (*s++ && n--)
+ ++len;
+ return len;
+}
+
+int main()
+{
+
+ check_vect ();
+
+ long pgsz = sysconf (_SC_PAGESIZE);
+ void *p = mmap (NULL, pgsz * 3, PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
+ if (p == MAP_FAILED)
+ return 0;
+ mprotect (p, pgsz, PROT_NONE);
+ mprotect (p+2*pgsz, pgsz, PROT_NONE);
+ char *p1 = p + pgsz;
+ p1[0] = 1;
+ p1[1] = 0;
+ foo (p1, 1000);
+ p1 = p + 2*pgsz - 2;
+ p1[0] = 1;
+ p1[1] = 0;
+ foo (p1, 1000);
+ return 0;
+}
+
@@ -619,10 +619,69 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
return opt_result::success ();
}
-/* Funcion vect_analyze_early_break_dependences.
+/* Function vect_analyze_early_break_dependences_1
- Examime all the data references in the loop and make sure that if we have
- mulitple exits that we are able to safely move stores such that they become
+ Helper function of vect_analyze_early_break_dependences which performs safety
+ analysis for load operations in an early break. */
+
+static opt_result
+vect_analyze_early_break_dependences_1 (data_reference *dr_ref, gimple *stmt)
+{
+ /* We currently only support statically allocated objects due to
+ not having first-faulting loads support or peeling for
+ alignment support. Compute the size of the referenced object
+ (it could be dynamically allocated). */
+ tree obj = DR_BASE_ADDRESS (dr_ref);
+ if (!obj || TREE_CODE (obj) != ADDR_EXPR)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "early breaks only supported on statically"
+ " allocated objects.\n");
+ return opt_result::failure_at (stmt,
+ "can't safely apply code motion to "
+ "dependencies of %G to vectorize "
+ "the early exit.\n", stmt);
+ }
+
+ tree refop = TREE_OPERAND (obj, 0);
+ tree refbase = get_base_address (refop);
+ if (!refbase || !DECL_P (refbase) || !DECL_SIZE (refbase)
+ || TREE_CODE (DECL_SIZE (refbase)) != INTEGER_CST)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "early breaks only supported on"
+ " statically allocated objects.\n");
+ return opt_result::failure_at (stmt,
+ "can't safely apply code motion to "
+ "dependencies of %G to vectorize "
+ "the early exit.\n", stmt);
+ }
+
+ /* Check if vector accesses to the object will be within bounds.
+ must be a constant or assume loop will be versioned or niters
+ bounded by VF so accesses are within range. */
+ if (!ref_within_array_bound (stmt, DR_REF (dr_ref)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "early breaks not supported: vectorization "
+ "would %s beyond size of obj.",
+ DR_IS_READ (dr_ref) ? "read" : "write");
+ return opt_result::failure_at (stmt,
+ "can't safely apply code motion to "
+ "dependencies of %G to vectorize "
+ "the early exit.\n", stmt);
+ }
+
+ return opt_result::success ();
+}
+
+/* Function vect_analyze_early_break_dependences.
+
+ Examine all the data references in the loop and make sure that if we have
+ multiple exits that we are able to safely move stores such that they become
safe for vectorization. The function also calculates the place where to move
the instructions to and computes what the new vUSE chain should be.
@@ -639,7 +698,7 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
- Multiple loads are allowed as long as they don't alias.
NOTE:
- This implemementation is very conservative. Any overlappig loads/stores
+ This implementation is very conservative. Any overlapping loads/stores
that take place before the early break statement gets rejected aside from
WAR dependencies.
@@ -668,7 +727,6 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
auto_vec<data_reference *> bases;
basic_block dest_bb = NULL;
- hash_set <gimple *> visited;
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
class loop *loop_nest = loop_outer (loop);
@@ -683,6 +741,7 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
dest_bb = single_pred (loop->latch);
basic_block bb = dest_bb;
+ /* First analyse all blocks leading to dest_bb excluding dest_bb itself. */
do
{
/* If the destination block is also the header then we have nothing to do. */
@@ -707,53 +766,11 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
if (!dr_ref)
continue;
- /* We currently only support statically allocated objects due to
- not having first-faulting loads support or peeling for
- alignment support. Compute the size of the referenced object
- (it could be dynamically allocated). */
- tree obj = DR_BASE_ADDRESS (dr_ref);
- if (!obj || TREE_CODE (obj) != ADDR_EXPR)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "early breaks only supported on statically"
- " allocated objects.\n");
- return opt_result::failure_at (stmt,
- "can't safely apply code motion to "
- "dependencies of %G to vectorize "
- "the early exit.\n", stmt);
- }
-
- tree refop = TREE_OPERAND (obj, 0);
- tree refbase = get_base_address (refop);
- if (!refbase || !DECL_P (refbase) || !DECL_SIZE (refbase)
- || TREE_CODE (DECL_SIZE (refbase)) != INTEGER_CST)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "early breaks only supported on"
- " statically allocated objects.\n");
- return opt_result::failure_at (stmt,
- "can't safely apply code motion to "
- "dependencies of %G to vectorize "
- "the early exit.\n", stmt);
- }
-
- /* Check if vector accesses to the object will be within bounds.
- must be a constant or assume loop will be versioned or niters
- bounded by VF so accesses are within range. */
- if (!ref_within_array_bound (stmt, DR_REF (dr_ref)))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "early breaks not supported: vectorization "
- "would %s beyond size of obj.",
- DR_IS_READ (dr_ref) ? "read" : "write");
- return opt_result::failure_at (stmt,
- "can't safely apply code motion to "
- "dependencies of %G to vectorize "
- "the early exit.\n", stmt);
- }
+ /* Check if the operation is one we can safely do. */
+ opt_result res
+ = vect_analyze_early_break_dependences_1 (dr_ref, stmt);
+ if (!res)
+ return res;
if (DR_IS_READ (dr_ref))
bases.safe_push (dr_ref);
@@ -817,6 +834,51 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
}
while (bb != loop->header);
+ /* For the destination BB we need to only analyze loads reachable from the early
+ break statement itself. */
+ auto_vec <tree> workset;
+ hash_set <tree> visited;
+ gimple *last_stmt = gsi_stmt (gsi_last_bb (dest_bb));
+ gcond *last_cond = dyn_cast <gcond *> (last_stmt);
+ /* If the cast fails we have a different control flow statement in the latch. Most
+ commonly this is a switch. */
+ if (!last_cond)
+ return opt_result::failure_at (last_stmt,
+ "can't safely apply code motion to dependencies"
+ " to vectorize the early exit, unknown control fow"
+ " in stmt %G", last_stmt);
+ workset.safe_push (gimple_cond_lhs (last_cond));
+ workset.safe_push (gimple_cond_rhs (last_cond));
+
+ imm_use_iterator imm_iter;
+ use_operand_p use_p;
+ tree lhs;
+ do
+ {
+ tree op = workset.pop ();
+ if (visited.add (op))
+ continue;
+ stmt_vec_info stmt_vinfo = loop_vinfo->lookup_def (op);
+
+ /* Not defined in loop, don't care. */
+ if (!stmt_vinfo)
+ continue;
+ gimple *stmt = STMT_VINFO_STMT (stmt_vinfo);
+ auto dr_ref = STMT_VINFO_DATA_REF (stmt_vinfo);
+ if (dr_ref)
+ {
+ opt_result res
+ = vect_analyze_early_break_dependences_1 (dr_ref, stmt);
+ if (!res)
+ return res;
+ }
+ else
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, op)
+ if ((lhs = gimple_get_lhs (USE_STMT (use_p))))
+ workset.safe_push (lhs);
+ }
+ while (!workset.is_empty ());
+
/* We don't allow outer -> inner loop transitions which should have been
trapped already during loop form analysis. */
gcc_assert (dest_bb->loop_father == loop);