PR112380: Defend against CLOBBERs in RTX expressions in combine.cc

Message ID 015801da15bf$6147e750$23d7b5f0$@nextmovesoftware.com
State Corrupt patch
Headers
Series PR112380: Defend against CLOBBERs in RTX expressions in combine.cc |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

Roger Sayle Nov. 12, 2023, 11:24 p.m. UTC
  This patch addresses PR rtl-optimization/112380, an ICE-on-valid regression
where a (clobber (const_int 0)) encounters a sanity checking gcc_assert
(at line 7554) in simplify-rtx.cc.  These CLOBBERs are used internally
by GCC's combine pass much like error_mark_node is used by various
language front-ends.

The solutions are either to handle/accept these CLOBBERs through-out
(or in more places in) the middle-end's RTL optimizers, including functions
in simplify-rtx.cc that are used by passes other than combine, and/or
attempt to prevent these CLOBBERs escaping from try_combine into the
RTX/RTL stream.  The benefit of the second approach is that it actually
allows for better optimization: when try_combine fails to simplify an
expression instead of substituting a CLOBBER to avoid the instruction
pattern being recognized, noticing the CLOBBER often allows combine
to attempt alternate simplifications/transformations looking for those
that can be recognized.

This patch is provided as two alternatives.  The first is the minimal
fix to address the CLOBBER encountered in the bugzilla PR.  Assuming
this approach is the correct fix to a latent bug/liability through-out
combine.cc, the second alternative fixes many of the places that may
potentially trigger problems in future, and allows combine to attempt
more valid combinations/transformations.  These were identified
proactively by changing the "fail:" case in gen_lowpart_for_combine
to return NULL_RTX, and working through the fall-out sufficient for
x86_64 to bootstrap and regression test without new failures.

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32}
with no new failures.  Ok for mainline?


2023-11-12  Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog
        PR rtl-optimization/112380
        * combine.cc (expand_field_assignment): Check if gen_lowpart
        returned a CLOBBER, and avoid calling gen_simplify_binary with
        it if so.

gcc/testsuite/ChangeLog
        PR rtl-optimization/112380
        * gcc.dg/pr112380.c: New test case.

gcc/ChangeLog
        PR rtl-optimization/112380
        * combine.cc (find_split_point): Check if gen_lowpart returned
        a CLOBBER.
        (subst): Check if combine_simplify_rtx returned a CLOBBER.
        (simplify_set): Check if force_to_mode returned a CLOBBER.
        Check if gen_lowpart returned a CLOBBER.
        (expand_field_assignment): Likewise.
        (make_extraction): Check if force_to_mode returned a CLOBBER.
        (force_int_to_mode): Likewise.
        (simplify_and_const_int_1): Check if VAROP is a CLOBBER, after
        call to force_to_mode (and before).
        (simplify_comparison): Check if force_to_mode returned a CLOBBER.
        Check if gen_lowpart returned a CLOBBER.

diff --git a/gcc/combine.cc b/gcc/combine.cc
index 6344cd3..969eb9d 100644
--- a/gcc/combine.cc
+++ b/gcc/combine.cc
@@ -5157,36 +5157,37 @@ find_split_point (rtx *loc, rtx_insn *insn, bool set_src)
 	     always at least get 8-bit constants in an AND insn, which is
 	     true for every current RISC.  */
 
-	  if (unsignedp && len <= 8)
+	  rtx lowpart = gen_lowpart (mode, inner);
+	  if (lowpart && GET_CODE (lowpart) != CLOBBER)
 	    {
-	      unsigned HOST_WIDE_INT mask
-		= (HOST_WIDE_INT_1U << len) - 1;
-	      rtx pos_rtx = gen_int_shift_amount (mode, pos);
-	      SUBST (SET_SRC (x),
-		     gen_rtx_AND (mode,
-				  gen_rtx_LSHIFTRT
-				  (mode, gen_lowpart (mode, inner), pos_rtx),
-				  gen_int_mode (mask, mode)));
-
-	      split = find_split_point (&SET_SRC (x), insn, true);
-	      if (split && split != &SET_SRC (x))
-		return split;
-	    }
-	  else
-	    {
-	      int left_bits = GET_MODE_PRECISION (mode) - len - pos;
-	      int right_bits = GET_MODE_PRECISION (mode) - len;
-	      SUBST (SET_SRC (x),
-		     gen_rtx_fmt_ee
-		     (unsignedp ? LSHIFTRT : ASHIFTRT, mode,
-		      gen_rtx_ASHIFT (mode,
-				      gen_lowpart (mode, inner),
-				      gen_int_shift_amount (mode, left_bits)),
-		      gen_int_shift_amount (mode, right_bits)));
-
-	      split = find_split_point (&SET_SRC (x), insn, true);
-	      if (split && split != &SET_SRC (x))
-		return split;
+	      if (unsignedp && len <= 8)
+		{
+		  unsigned HOST_WIDE_INT mask
+		    = (HOST_WIDE_INT_1U << len) - 1;
+		  rtx pos_rtx = gen_int_shift_amount (mode, pos);
+		  SUBST (SET_SRC (x),
+			 gen_rtx_AND (mode,
+				      gen_rtx_LSHIFTRT (mode, lowpart, pos_rtx),
+				      gen_int_mode (mask, mode)));
+
+		  split = find_split_point (&SET_SRC (x), insn, true);
+		  if (split && split != &SET_SRC (x))
+		    return split;
+		}
+	      else
+		{
+		  int lbits = GET_MODE_PRECISION (mode) - len - pos;
+		  int rbits = GET_MODE_PRECISION (mode) - len;
+		  SUBST (SET_SRC (x),
+			 gen_rtx_fmt_ee
+			   (unsignedp ? LSHIFTRT : ASHIFTRT, mode,
+			    gen_rtx_ASHIFT (mode, lowpart,
+					    gen_int_shift_amount (mode, lbits)),
+			    gen_int_shift_amount (mode, rbits)));
+		  split = find_split_point (&SET_SRC (x), insn, true);
+		  if (split && split != &SET_SRC (x))
+		    return split;
+		}
 	    }
 	}
 
@@ -5606,7 +5607,12 @@ subst (rtx x, rtx from, rtx to, bool in_dest, bool in_cond, bool unique_copy)
       /* If X is sufficiently simple, don't bother trying to do anything
 	 with it.  */
       if (code != CONST_INT && code != REG && code != CLOBBER)
-	x = combine_simplify_rtx (x, op0_mode, in_dest, in_cond);
+	{
+	  rtx tmp = combine_simplify_rtx (x, op0_mode, in_dest, in_cond);
+	  if (!tmp || GET_CODE (tmp) == CLOBBER)
+	    break;
+	  x = tmp;
+	}
 
       if (GET_CODE (x) == code)
 	break;
@@ -6783,8 +6789,12 @@ simplify_set (rtx x)
 
   if (GET_MODE_CLASS (mode) == MODE_INT && HWI_COMPUTABLE_MODE_P (mode))
     {
-      src = force_to_mode (src, mode, HOST_WIDE_INT_M1U, false);
-      SUBST (SET_SRC (x), src);
+      rtx tmp = force_to_mode (src, mode, HOST_WIDE_INT_M1U, false);
+      if (tmp && GET_CODE (tmp) != CLOBBER)
+	{
+	  SUBST (SET_SRC (x), tmp);
+	  src = tmp;
+	}
     }
 
   /* If the source is a COMPARE, look for the use of the comparison result
@@ -6982,12 +6992,14 @@ simplify_set (rtx x)
 	  || (GET_CODE (dest) == SUBREG
 	      && REG_P (SUBREG_REG (dest)))))
     {
-      SUBST (SET_DEST (x),
-	     gen_lowpart (GET_MODE (SUBREG_REG (src)),
-				      dest));
-      SUBST (SET_SRC (x), SUBREG_REG (src));
-
-      src = SET_SRC (x), dest = SET_DEST (x);
+      rtx tmp = gen_lowpart (GET_MODE (SUBREG_REG (src)), dest);
+      if (tmp && GET_CODE (tmp) != CLOBBER)
+	{
+	  SUBST (SET_DEST (x), tmp);
+	  SUBST (SET_SRC (x), SUBREG_REG (src));
+	  src = SET_SRC (x);
+	  dest = tmp;
+	}
     }
 
   /* If we have (set FOO (subreg:M (mem:N BAR) 0)) with M wider than N, this
@@ -7424,17 +7436,18 @@ expand_field_assignment (const_rtx x)
 	    }
 	}
 
-      /* If the destination is a subreg that overwrites the whole of the inner
-	 register, we can move the subreg to the source.  */
+      /* If the destination is a subreg that overwrites the whole of the
+         inner register, we can move the subreg to the source.  */
       else if (GET_CODE (SET_DEST (x)) == SUBREG
 	       /* We need SUBREGs to compute nonzero_bits properly.  */
 	       && nonzero_sign_valid
 	       && !read_modify_subreg_p (SET_DEST (x)))
 	{
-	  x = gen_rtx_SET (SUBREG_REG (SET_DEST (x)),
-			   gen_lowpart
-			   (GET_MODE (SUBREG_REG (SET_DEST (x))),
-			    SET_SRC (x)));
+	  rtx tmp = gen_lowpart (GET_MODE (SUBREG_REG (SET_DEST (x))),
+				 SET_SRC (x));
+	  if (!tmp || GET_CODE (tmp) == CLOBBER)
+	    break;
+	  x = gen_rtx_SET (SUBREG_REG (SET_DEST (x)), tmp);
 	  continue;
 	}
       else
@@ -7466,6 +7479,11 @@ expand_field_assignment (const_rtx x)
       if (!targetm.scalar_mode_supported_p (compute_mode))
 	break;
 
+      /* gen_lowpart_for_combine returns CLOBBER on failure.  */
+      rtx lowpart = gen_lowpart (compute_mode, SET_SRC (x));
+      if (!lowpart || GET_CODE (lowpart) == CLOBBER)
+	break;
+
       /* Now compute the equivalent expression.  Make a copy of INNER
 	 for the SET_DEST in case it is a MEM into which we will substitute;
 	 we don't want shared RTL in that case.  */
@@ -7480,9 +7498,7 @@ expand_field_assignment (const_rtx x)
 				     inner);
       masked = simplify_gen_binary (ASHIFT, compute_mode,
 				    simplify_gen_binary (
-				      AND, compute_mode,
-				      gen_lowpart (compute_mode, SET_SRC (x)),
-				      mask),
+				      AND, compute_mode, lowpart, mask),
 				    pos);
 
       x = gen_rtx_SET (copy_rtx (inner),
@@ -7679,6 +7695,9 @@ make_extraction (machine_mode mode, rtx inner, HOST_WIDE_INT pos,
 				 ? HOST_WIDE_INT_M1U
 				 : (HOST_WIDE_INT_1U << len) - 1, false);
 
+      if (!new_rtx || GET_CODE (new_rtx) == CLOBBER)
+	return NULL_RTX;
+
       /* If this extraction is going into the destination of a SET,
 	 make a STRICT_LOW_PART unless we made a MEM.  */
 
@@ -8911,6 +8930,11 @@ force_int_to_mode (rtx x, scalar_int_mode mode, scalar_int_mode xmode,
       op0 = force_to_mode (XEXP (x, 0), mode, mask, next_select);
       op1 = force_to_mode (XEXP (x, 1), mode, mask, next_select);
 
+      if (!op0 || !op1
+	  || GET_CODE (op0) == CLOBBER
+	  || GET_CODE (op1) == CLOBBER)
+	break;
+
       /* If we ended up truncating both operands, truncate the result of the
 	 operation instead.  */
       if (GET_CODE (op0) == TRUNCATE
@@ -8956,9 +8980,11 @@ force_int_to_mode (rtx x, scalar_int_mode mode, scalar_int_mode xmode,
       else
 	mask = fuller_mask;
 
-      op0 = gen_lowpart_or_truncate (op_mode,
-				     force_to_mode (XEXP (x, 0), mode,
-						    mask, next_select));
+      op0 = force_to_mode (XEXP (x, 0), mode, mask, next_select);
+      if (!op0 || GET_CODE (op0) == CLOBBER)
+	break;
+
+      op0 = gen_lowpart_or_truncate (op_mode, op0);
 
       if (op_mode != xmode || op0 != XEXP (x, 0))
 	{
@@ -9166,13 +9192,15 @@ force_int_to_mode (rtx x, scalar_int_mode mode, scalar_int_mode xmode,
       mask = fuller_mask;
 
     unop:
-      op0 = gen_lowpart_or_truncate (op_mode,
-				     force_to_mode (XEXP (x, 0), mode, mask,
-						    next_select));
-      if (op_mode != xmode || op0 != XEXP (x, 0))
+      op0 = force_to_mode (XEXP (x, 0), mode, mask, next_select);
+      if (op0 && GET_CODE (op0) != CLOBBER)
 	{
-	  x = simplify_gen_unary (code, op_mode, op0, op_mode);
-	  xmode = op_mode;
+	  op0 = gen_lowpart_or_truncate (op_mode, op0);
+	  if (op_mode != xmode || op0 != XEXP (x, 0))
+	    {
+	      x = simplify_gen_unary (code, op_mode, op0, op_mode);
+	      xmode = op_mode;
+	    }
 	}
       break;
 
@@ -10056,7 +10084,7 @@ simplify_and_const_int_1 (scalar_int_mode mode, rtx varop,
 
   orig_varop = varop;
   orig_constop = constop;
-  if (GET_CODE (varop) == CLOBBER)
+  if (!varop || GET_CODE (varop) == CLOBBER)
     return NULL_RTX;
 
   /* Simplify VAROP knowing that we will be only looking at some of the
@@ -10069,7 +10097,7 @@ simplify_and_const_int_1 (scalar_int_mode mode, rtx varop,
   varop = force_to_mode (varop, mode, constop, false);
 
   /* If VAROP is a CLOBBER, we will fail so return it.  */
-  if (GET_CODE (varop) == CLOBBER)
+  if (!varop || GET_CODE (varop) == CLOBBER)
     return varop;
 
   /* If VAROP is a CONST_INT, then we need to apply the mask in CONSTOP
@@ -12272,9 +12300,14 @@ simplify_comparison (enum rtx_code code, rtx *pop0, rtx *pop1)
       if (sign_bit_comparison_p
 	  && is_a <scalar_int_mode> (raw_mode, &int_mode)
 	  && HWI_COMPUTABLE_MODE_P (int_mode))
-	op0 = force_to_mode (op0, int_mode,
-			     HOST_WIDE_INT_1U
-			     << (GET_MODE_PRECISION (int_mode) - 1), false);
+	{
+	  rtx tmp = force_to_mode (op0, int_mode,
+				   HOST_WIDE_INT_1U
+				   << (GET_MODE_PRECISION (int_mode) - 1),
+				   false);
+	  if (tmp && GET_CODE (tmp) != CLOBBER)
+	    op0 = tmp;
+	}
 
       if (COMPARISON_P (op0))
 	{
@@ -13052,8 +13085,12 @@ simplify_comparison (enum rtx_code code, rtx *pop0, rtx *pop1)
 	{
 	  tem = gen_lowpart (inner_mode, op1);
 
-	  if ((nonzero_bits (tem, inner_mode) & ~GET_MODE_MASK (mode)) == 0)
-	    op0 = SUBREG_REG (op0), op1 = tem;
+	  if (tem && GET_CODE (tem) != CLOBBER
+	      && (nonzero_bits (tem, inner_mode) & ~GET_MODE_MASK (mode)) == 0)
+	    {
+	      op0 = SUBREG_REG (op0);
+	      op1 = tem;
+	    }
 	}
     }
diff --git a/gcc/testsuite/gcc.dg/pr112380.c b/gcc/testsuite/gcc.dg/pr112380.c
new file mode 100644
index 0000000..7dd7a85
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr112380.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+enum { TGSI_FILE_NULL };
+struct ureg_src {
+  unsigned File : 4;
+  unsigned : 2;
+  unsigned : 2;
+  unsigned : 2;
+  unsigned : 1;
+  unsigned IndirectFile : 4;
+  unsigned IndirectSwizzle : 2;
+  int : 16;
+  int : 6;
+  int : 16;
+  int : 16;
+  unsigned : 10;
+} __trans_tmp_1;
+
+int ureg_src_indirect_addr_1, ntt_emit_texture_instr_sampler_handle_src;
+
+void ureg_scalar(struct ureg_src);
+
+void ntt_emit_texture_instr() {
+  struct ureg_src sampler;
+  if (ntt_emit_texture_instr_sampler_handle_src)
+    sampler = __trans_tmp_1;
+  struct ureg_src reg = sampler;
+  reg.File != TGSI_FILE_NULL;
+  reg.IndirectFile = reg.IndirectSwizzle = ureg_src_indirect_addr_1;
+  sampler = reg;
+  ureg_scalar(reg);
+}
  

Patch

diff --git a/gcc/combine.cc b/gcc/combine.cc
index 6344cd3..f2c64a9 100644
--- a/gcc/combine.cc
+++ b/gcc/combine.cc
@@ -7466,6 +7466,11 @@  expand_field_assignment (const_rtx x)
       if (!targetm.scalar_mode_supported_p (compute_mode))
 	break;
 
+      /* gen_lowpart_for_combine returns CLOBBER on failure.  */
+      rtx lowpart = gen_lowpart (compute_mode, SET_SRC (x));
+      if (GET_CODE (lowpart) == CLOBBER)
+	break;
+
       /* Now compute the equivalent expression.  Make a copy of INNER
 	 for the SET_DEST in case it is a MEM into which we will substitute;
 	 we don't want shared RTL in that case.  */
@@ -7480,9 +7485,7 @@  expand_field_assignment (const_rtx x)
 				     inner);
       masked = simplify_gen_binary (ASHIFT, compute_mode,
 				    simplify_gen_binary (
-				      AND, compute_mode,
-				      gen_lowpart (compute_mode, SET_SRC (x)),
-				      mask),
+				      AND, compute_mode, lowpart, mask),
 				    pos);
 
       x = gen_rtx_SET (copy_rtx (inner),