alias-analyis: try to find ADDR_EXPR for SSA_NAME ptr

Message ID SN6PR01MB4240086D3865DD3F4F4D4F99E8E0A@SN6PR01MB4240.prod.exchangelabs.com
State Accepted
Headers
Series alias-analyis: try to find ADDR_EXPR for SSA_NAME ptr |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

Di Zhao OS Aug. 28, 2023, 9:35 a.m. UTC
  This patch tries to improve alias-analysis between an SSA_NAME and
a declaration a little. For a case like:

	int array1[10], array2[10];
	ptr1 = array1 + x;
	ptr2 = ptr1 + y;

, *ptr2 should not alias with array2.

If we can't disambiguate from points-to information, this patch
tries to find a determined ADDR_EXPR following the defining
statements and then disambiguate by compare_base_decls. On spec2017
502.gcc, there are several thousands of new non-aliasing relation
found. (No obvious improvements or regressions found, though.) 

Bootstrapped and tested on aarch64-unknown-linux-gnu.

Thanks,
Di Zhao


gcc/ChangeLog:

        * tree-ssa-alias.cc (ptr_deref_may_alias_decl_p): try
        to find ADDR_EXPR for SSA_NAME ptrs.

---
 gcc/tree-ssa-alias.cc | 42 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 34 insertions(+), 8 deletions(-)
  

Comments

Richard Biener Aug. 28, 2023, 9:47 a.m. UTC | #1
On Mon, Aug 28, 2023 at 11:35 AM Di Zhao OS via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> This patch tries to improve alias-analysis between an SSA_NAME and
> a declaration a little. For a case like:
>
>         int array1[10], array2[10];
>         ptr1 = array1 + x;
>         ptr2 = ptr1 + y;
>
> , *ptr2 should not alias with array2.
>
> If we can't disambiguate from points-to information, this patch
> tries to find a determined ADDR_EXPR following the defining
> statements and then disambiguate by compare_base_decls. On spec2017
> 502.gcc, there are several thousands of new non-aliasing relation
> found. (No obvious improvements or regressions found, though.)
>
> Bootstrapped and tested on aarch64-unknown-linux-gnu.
>
> Thanks,
> Di Zhao
>
>
> gcc/ChangeLog:
>
>         * tree-ssa-alias.cc (ptr_deref_may_alias_decl_p): try
>         to find ADDR_EXPR for SSA_NAME ptrs.
>
> ---
>  gcc/tree-ssa-alias.cc | 42 ++++++++++++++++++++++++++++++++++--------
>  1 file changed, 34 insertions(+), 8 deletions(-)
>
> diff --git a/gcc/tree-ssa-alias.cc b/gcc/tree-ssa-alias.cc
> index cf38fe506a8..a6fe1e7b227 100644
> --- a/gcc/tree-ssa-alias.cc
> +++ b/gcc/tree-ssa-alias.cc
> @@ -271,6 +271,38 @@ ptr_deref_may_alias_decl_p (tree ptr, tree decl)
>        return ptr_deref_may_alias_decl_p (ptr, decl);
>      }
>
> +  if (TREE_CODE (ptr) == SSA_NAME)
> +    {
> +      /* First disambiguate from points-to information.  */
> +      pi = SSA_NAME_PTR_INFO (ptr);
> +      if (pi && !pt_solution_includes (&pi->pt, decl))
> +       return false;
> +
> +      /* Try to find an ADDR_EXPR by walking the defining statements, so we can
> +        probably disambiguate from compare_base_decls.  */
> +      gimple *def_stmt;
> +      while ((def_stmt = SSA_NAME_DEF_STMT (ptr)))
> +       {
> +         if (is_gimple_assign (def_stmt)
> +             && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR)

This is going to very badly regress compile-time with long chains of
pointer adjustments so not really appropriate.

What are the sources of missed points-to information here?  Sometimes
transforms unnecessarily drop or forget to propagate info.

What are the passes that actually benefit most from this change?  We could
implement a light-weight points-to info "forwarding" pass for example.  Iff,
then your patch should update points-to info on the stmts it visits during
this walk to make future walks cheaper - and IMHO it definitely would need
an upper bound on the number of stmts walked.  Something like

  auto_vec<tree, 8> ptrs;
  while (..)
    {
       result = check alias;
       ptrs.quick_push (ptr);
    }
  update points-to-info of ptrs;
  return result;

I'll also note that other APIs have very much the same issue
(you can look for POINTER_PLUS_EXPR walks of GENERIC
arguments), splitting out the POINTER_PLUS_EXPR handling
would be good.

But first I'd like to have the first questions answered.

> +           {
> +             ptr = gimple_assign_rhs1 (def_stmt);
> +             if (TREE_CODE (ptr) != SSA_NAME)
> +               break;
> +           }
> +         /* See if we can find a certain defining source.  */
> +         else if (gimple_code (def_stmt) == GIMPLE_PHI
> +                  && gimple_phi_num_args (def_stmt) == 1)
> +           {
> +             ptr = PHI_ARG_DEF (def_stmt, 0);
> +             if (TREE_CODE (ptr) != SSA_NAME)
> +               break;
> +           }
> +         else
> +           break;
> +       }
> +    }
> +
>    /* ADDR_EXPR pointers either just offset another pointer or directly
>       specify the pointed-to set.  */
>    if (TREE_CODE (ptr) == ADDR_EXPR)
> @@ -279,7 +311,7 @@ ptr_deref_may_alias_decl_p (tree ptr, tree decl)
>        if (base
>           && (TREE_CODE (base) == MEM_REF
>               || TREE_CODE (base) == TARGET_MEM_REF))
> -       ptr = TREE_OPERAND (base, 0);
> +       return ptr_deref_may_alias_decl_p (TREE_OPERAND (base, 0), decl);
>        else if (base
>                && DECL_P (base))
>         return compare_base_decls (base, decl) != 0;
> @@ -294,13 +326,7 @@ ptr_deref_may_alias_decl_p (tree ptr, tree decl)
>    if (!may_be_aliased (decl))
>      return false;
>
> -  /* If we do not have useful points-to information for this pointer
> -     we cannot disambiguate anything else.  */
> -  pi = SSA_NAME_PTR_INFO (ptr);
> -  if (!pi)
> -    return true;
> -
> -  return pt_solution_includes (&pi->pt, decl);
> +  return true;
>  }
>
>  /* Return true if dereferenced PTR1 and PTR2 may alias.
> --
> 2.25.1
  

Patch

diff --git a/gcc/tree-ssa-alias.cc b/gcc/tree-ssa-alias.cc
index cf38fe506a8..a6fe1e7b227 100644
--- a/gcc/tree-ssa-alias.cc
+++ b/gcc/tree-ssa-alias.cc
@@ -271,6 +271,38 @@  ptr_deref_may_alias_decl_p (tree ptr, tree decl)
       return ptr_deref_may_alias_decl_p (ptr, decl);
     }
 
+  if (TREE_CODE (ptr) == SSA_NAME)
+    {
+      /* First disambiguate from points-to information.  */
+      pi = SSA_NAME_PTR_INFO (ptr);
+      if (pi && !pt_solution_includes (&pi->pt, decl))
+	return false;
+
+      /* Try to find an ADDR_EXPR by walking the defining statements, so we can
+	 probably disambiguate from compare_base_decls.  */
+      gimple *def_stmt;
+      while ((def_stmt = SSA_NAME_DEF_STMT (ptr)))
+	{
+	  if (is_gimple_assign (def_stmt)
+	      && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR)
+	    {
+	      ptr = gimple_assign_rhs1 (def_stmt);
+	      if (TREE_CODE (ptr) != SSA_NAME)
+		break;
+	    }
+	  /* See if we can find a certain defining source.  */
+	  else if (gimple_code (def_stmt) == GIMPLE_PHI
+		   && gimple_phi_num_args (def_stmt) == 1)
+	    {
+	      ptr = PHI_ARG_DEF (def_stmt, 0);
+	      if (TREE_CODE (ptr) != SSA_NAME)
+		break;
+	    }
+	  else
+	    break;
+	}
+    }
+
   /* ADDR_EXPR pointers either just offset another pointer or directly
      specify the pointed-to set.  */
   if (TREE_CODE (ptr) == ADDR_EXPR)
@@ -279,7 +311,7 @@  ptr_deref_may_alias_decl_p (tree ptr, tree decl)
       if (base
 	  && (TREE_CODE (base) == MEM_REF
 	      || TREE_CODE (base) == TARGET_MEM_REF))
-	ptr = TREE_OPERAND (base, 0);
+	return ptr_deref_may_alias_decl_p (TREE_OPERAND (base, 0), decl);
       else if (base
 	       && DECL_P (base))
 	return compare_base_decls (base, decl) != 0;
@@ -294,13 +326,7 @@  ptr_deref_may_alias_decl_p (tree ptr, tree decl)
   if (!may_be_aliased (decl))
     return false;
 
-  /* If we do not have useful points-to information for this pointer
-     we cannot disambiguate anything else.  */
-  pi = SSA_NAME_PTR_INFO (ptr);
-  if (!pi)
-    return true;
-
-  return pt_solution_includes (&pi->pt, decl);
+  return true;
 }
 
 /* Return true if dereferenced PTR1 and PTR2 may alias.