[v2] aarch64: Fix eh_return for -mtrack-speculation [PR112987]

Message ID 20240125135854.643376-1-szabolcs.nagy@arm.com
State Unresolved
Headers
Series [v2] aarch64: Fix eh_return for -mtrack-speculation [PR112987] |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

Szabolcs Nagy Jan. 25, 2024, 1:58 p.m. UTC
  Recent commit introduced a conditional branch in eh_return epilogues
that is not compatible with speculation tracking:

  commit 426fddcbdad6746fe70e031f707fb07f55dfb405
  Author:     Szabolcs Nagy <szabolcs.nagy@arm.com>
  CommitDate: 2023-11-27 15:52:48 +0000

  aarch64: Use br instead of ret for eh_return

Refactor the compare zero and jump pattern and use it to fix the issue.

gcc/ChangeLog:

	PR target/112987
	* config/aarch64/aarch64.cc (aarch64_gen_compare_zero_and_branch): New.
	(aarch64_expand_epilogue): Use the new function.
	(aarch64_split_compare_and_swap): Likewise.
	(aarch64_split_atomic_op): Likewise.
---
v2: factor out aarch64_gen_compare_zero_and_branch

 gcc/config/aarch64/aarch64.cc | 75 +++++++++++++++--------------------
 1 file changed, 32 insertions(+), 43 deletions(-)
  

Comments

Richard Sandiford Jan. 25, 2024, 2:26 p.m. UTC | #1
Szabolcs Nagy <szabolcs.nagy@arm.com> writes:
> Recent commit introduced a conditional branch in eh_return epilogues
> that is not compatible with speculation tracking:
>
>   commit 426fddcbdad6746fe70e031f707fb07f55dfb405
>   Author:     Szabolcs Nagy <szabolcs.nagy@arm.com>
>   CommitDate: 2023-11-27 15:52:48 +0000
>
>   aarch64: Use br instead of ret for eh_return
>
> Refactor the compare zero and jump pattern and use it to fix the issue.
>
> gcc/ChangeLog:
>
> 	PR target/112987
> 	* config/aarch64/aarch64.cc (aarch64_gen_compare_zero_and_branch): New.
> 	(aarch64_expand_epilogue): Use the new function.
> 	(aarch64_split_compare_and_swap): Likewise.
> 	(aarch64_split_atomic_op): Likewise.

OK, thanks.

Richard

> ---
> v2: factor out aarch64_gen_compare_zero_and_branch
>
>  gcc/config/aarch64/aarch64.cc | 75 +++++++++++++++--------------------
>  1 file changed, 32 insertions(+), 43 deletions(-)
>
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 3d6dd98c5c5..d2014ce1527 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -2637,6 +2637,28 @@ aarch64_gen_compare_reg_maybe_ze (RTX_CODE code, rtx x, rtx y,
>    return aarch64_gen_compare_reg (code, x, y);
>  }
>  
> +/* Generate conditional branch to LABEL, comparing X to 0 using CODE.
> +   Return the jump instruction.  */
> +
> +static rtx
> +aarch64_gen_compare_zero_and_branch (rtx_code code, rtx x,
> +				     rtx_code_label *label)
> +{
> +  if (aarch64_track_speculation)
> +    {
> +      /* Emit an explicit compare instruction, so that we can correctly
> +	 track the condition codes.  */
> +      rtx cc_reg = aarch64_gen_compare_reg (code, x, const0_rtx);
> +      x = gen_rtx_fmt_ee (code, GET_MODE (cc_reg), cc_reg, const0_rtx);
> +    }
> +  else
> +    x = gen_rtx_fmt_ee (code, VOIDmode, x, const0_rtx);
> +
> +  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
> +			    gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
> +  return gen_rtx_SET (pc_rtx, x);
> +}
> +
>  /* Consider the operation:
>  
>       OPERANDS[0] = CODE (OPERANDS[1], OPERANDS[2]) + OPERANDS[3]
> @@ -9882,11 +9904,10 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
>  	 to be SP; letting the CFA move during this adjustment
>  	 is just as correct as retaining the CFA from the body
>  	 of the function.  Therefore, do nothing special.  */
> -      rtx label = gen_label_rtx ();
> -      rtx x = gen_rtx_EQ (VOIDmode, EH_RETURN_TAKEN_RTX, const0_rtx);
> -      x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
> -				gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
> -      rtx jump = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
> +      rtx_code_label *label = gen_label_rtx ();
> +      rtx x = aarch64_gen_compare_zero_and_branch (EQ, EH_RETURN_TAKEN_RTX,
> +						   label);
> +      rtx jump = emit_jump_insn (x);
>        JUMP_LABEL (jump) = label;
>        LABEL_NUSES (label)++;
>        emit_insn (gen_add2_insn (stack_pointer_rtx,
> @@ -24657,19 +24678,8 @@ aarch64_split_compare_and_swap (rtx operands[])
>  
>    if (!is_weak)
>      {
> -      if (aarch64_track_speculation)
> -	{
> -	  /* Emit an explicit compare instruction, so that we can correctly
> -	     track the condition codes.  */
> -	  rtx cc_reg = aarch64_gen_compare_reg (NE, scratch, const0_rtx);
> -	  x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
> -	}
> -      else
> -	x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
> -
> -      x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
> -				gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
> -      aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
> +      x = aarch64_gen_compare_zero_and_branch (NE, scratch, label1);
> +      aarch64_emit_unlikely_jump (x);
>      }
>    else
>      aarch64_gen_compare_reg (NE, scratch, const0_rtx);
> @@ -24685,18 +24695,8 @@ aarch64_split_compare_and_swap (rtx operands[])
>        emit_label (label2);
>        aarch64_emit_store_exclusive (mode, scratch, mem, rval, model_rtx);
>  
> -      if (aarch64_track_speculation)
> -	{
> -	  /* Emit an explicit compare instruction, so that we can correctly
> -	     track the condition codes.  */
> -	  rtx cc_reg = aarch64_gen_compare_reg (NE, scratch, const0_rtx);
> -	  x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
> -	}
> -      else
> -	x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
> -      x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
> -				gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
> -      aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
> +      x = aarch64_gen_compare_zero_and_branch (NE, scratch, label1);
> +      aarch64_emit_unlikely_jump (x);
>  
>        label2 = label3;
>      }
> @@ -24780,19 +24780,8 @@ aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
>    aarch64_emit_store_exclusive (mode, cond, mem,
>  				gen_lowpart (mode, new_out), model_rtx);
>  
> -  if (aarch64_track_speculation)
> -    {
> -      /* Emit an explicit compare instruction, so that we can correctly
> -	 track the condition codes.  */
> -      rtx cc_reg = aarch64_gen_compare_reg (NE, cond, const0_rtx);
> -      x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
> -    }
> -  else
> -    x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
> -
> -  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
> -			    gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
> -  aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
> +  x = aarch64_gen_compare_zero_and_branch (NE, cond, label);
> +  aarch64_emit_unlikely_jump (x);
>  
>    /* Emit any final barrier needed for a __sync operation.  */
>    if (is_sync)
  

Patch

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 3d6dd98c5c5..d2014ce1527 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -2637,6 +2637,28 @@  aarch64_gen_compare_reg_maybe_ze (RTX_CODE code, rtx x, rtx y,
   return aarch64_gen_compare_reg (code, x, y);
 }
 
+/* Generate conditional branch to LABEL, comparing X to 0 using CODE.
+   Return the jump instruction.  */
+
+static rtx
+aarch64_gen_compare_zero_and_branch (rtx_code code, rtx x,
+				     rtx_code_label *label)
+{
+  if (aarch64_track_speculation)
+    {
+      /* Emit an explicit compare instruction, so that we can correctly
+	 track the condition codes.  */
+      rtx cc_reg = aarch64_gen_compare_reg (code, x, const0_rtx);
+      x = gen_rtx_fmt_ee (code, GET_MODE (cc_reg), cc_reg, const0_rtx);
+    }
+  else
+    x = gen_rtx_fmt_ee (code, VOIDmode, x, const0_rtx);
+
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
+  return gen_rtx_SET (pc_rtx, x);
+}
+
 /* Consider the operation:
 
      OPERANDS[0] = CODE (OPERANDS[1], OPERANDS[2]) + OPERANDS[3]
@@ -9882,11 +9904,10 @@  aarch64_expand_epilogue (rtx_call_insn *sibcall)
 	 to be SP; letting the CFA move during this adjustment
 	 is just as correct as retaining the CFA from the body
 	 of the function.  Therefore, do nothing special.  */
-      rtx label = gen_label_rtx ();
-      rtx x = gen_rtx_EQ (VOIDmode, EH_RETURN_TAKEN_RTX, const0_rtx);
-      x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
-				gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
-      rtx jump = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
+      rtx_code_label *label = gen_label_rtx ();
+      rtx x = aarch64_gen_compare_zero_and_branch (EQ, EH_RETURN_TAKEN_RTX,
+						   label);
+      rtx jump = emit_jump_insn (x);
       JUMP_LABEL (jump) = label;
       LABEL_NUSES (label)++;
       emit_insn (gen_add2_insn (stack_pointer_rtx,
@@ -24657,19 +24678,8 @@  aarch64_split_compare_and_swap (rtx operands[])
 
   if (!is_weak)
     {
-      if (aarch64_track_speculation)
-	{
-	  /* Emit an explicit compare instruction, so that we can correctly
-	     track the condition codes.  */
-	  rtx cc_reg = aarch64_gen_compare_reg (NE, scratch, const0_rtx);
-	  x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
-	}
-      else
-	x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
-
-      x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
-				gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
-      aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
+      x = aarch64_gen_compare_zero_and_branch (NE, scratch, label1);
+      aarch64_emit_unlikely_jump (x);
     }
   else
     aarch64_gen_compare_reg (NE, scratch, const0_rtx);
@@ -24685,18 +24695,8 @@  aarch64_split_compare_and_swap (rtx operands[])
       emit_label (label2);
       aarch64_emit_store_exclusive (mode, scratch, mem, rval, model_rtx);
 
-      if (aarch64_track_speculation)
-	{
-	  /* Emit an explicit compare instruction, so that we can correctly
-	     track the condition codes.  */
-	  rtx cc_reg = aarch64_gen_compare_reg (NE, scratch, const0_rtx);
-	  x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
-	}
-      else
-	x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
-      x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
-				gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
-      aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
+      x = aarch64_gen_compare_zero_and_branch (NE, scratch, label1);
+      aarch64_emit_unlikely_jump (x);
 
       label2 = label3;
     }
@@ -24780,19 +24780,8 @@  aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
   aarch64_emit_store_exclusive (mode, cond, mem,
 				gen_lowpart (mode, new_out), model_rtx);
 
-  if (aarch64_track_speculation)
-    {
-      /* Emit an explicit compare instruction, so that we can correctly
-	 track the condition codes.  */
-      rtx cc_reg = aarch64_gen_compare_reg (NE, cond, const0_rtx);
-      x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
-    }
-  else
-    x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
-
-  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
-			    gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
-  aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
+  x = aarch64_gen_compare_zero_and_branch (NE, cond, label);
+  aarch64_emit_unlikely_jump (x);
 
   /* Emit any final barrier needed for a __sync operation.  */
   if (is_sync)