[v2,04/11] Native complex ops: Allow native complex regs and ops in rtl

Message ID 20230912100713.1074-5-snoiry@kalrayinc.com
State Unresolved
Headers
Series Native complex operations |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

Sylvain Noiry Sept. 12, 2023, 10:07 a.m. UTC
  Summary:
Support registers of complex types in rtl. Also adapt the functions
called during the expand pass to support native complex operations.

gcc/ChangeLog:

	* explow.cc (trunc_int_for_mode): Allow complex int modes
	* expr.cc (emit_move_complex_parts): Move both parts at the
	same time if it is supported by the backend
	(emit_move_complex): Do not move via integer if not int mode
	corresponds. For complex floats, relax the constraint on the
	number of registers for targets with pairs of registers, and
	use native moves if it is supported by the backend.
	(expand_expr_real_2): Move both parts at the same time if it
	is supported by the backend
	(expand_expr_real_1): Update the expand of complex constants
	(const_vector_from_tree): Add the expand of both parts of a
	complex	constant
	* real.h: update FLOAT_MODE_FORMAT
	* machmode.h: Add COMPLEX_INT_MODE_P and COMPLEX_FLOAT_MODE_P
	predicates
	* optabs-libfuncs.cc (gen_int_libfunc): Add support for
	complex modes
	(gen_intv_fp_libfunc): Likewise
	* recog.cc (general_operand): Likewise
	* cse.cc (try_const_anchors): Likewise
	* emit-rtl.cc: (validate_subreg): Likewise
---
 gcc/cse.cc               |  2 +-
 gcc/doc/tm.texi          |  2 +-
 gcc/emit-rtl.cc          |  2 +-
 gcc/explow.cc            |  2 +-
 gcc/expr.cc              | 70 ++++++++++++++++++++++++++++++++++------
 gcc/internal-fn.cc       |  4 +--
 gcc/machmode.h           |  8 +++++
 gcc/optabs-libfuncs.cc   | 25 ++++++++++----
 gcc/real.h               |  3 +-
 gcc/recog.cc             |  1 +
 gcc/target.def           |  2 +-
 gcc/targhooks.cc         |  8 ++---
 gcc/targhooks.h          |  3 +-
 gcc/tree-ssa-forwprop.cc |  1 +
 14 files changed, 105 insertions(+), 28 deletions(-)
  

Patch

diff --git a/gcc/cse.cc b/gcc/cse.cc
index c46870059e6..5ce6c692070 100644
--- a/gcc/cse.cc
+++ b/gcc/cse.cc
@@ -1313,7 +1313,7 @@  try_const_anchors (rtx src_const, machine_mode mode)
   unsigned lower_old, upper_old;
 
   /* CONST_INT may be in various modes, avoid non-scalar-int mode. */
-  if (!SCALAR_INT_MODE_P (mode))
+  if (!(SCALAR_INT_MODE_P (mode) || COMPLEX_INT_MODE_P (mode)))
     return NULL_RTX;
 
   if (!compute_const_anchors (src_const, &lower_base, &lower_offs,
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 470497a3ade..1e87f798449 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -4631,7 +4631,7 @@  This hook should return the rtx representing the specified @var{part} of the com
   @var{part} can be the real part, the imaginary part, or both of them.
 @end deftypefn
 
-@deftypefn {Target Hook} void TARGET_WRITE_COMPLEX_PART (rtx @var{cplx}, rtx @var{val}, complex_part_t @var{part})
+@deftypefn {Target Hook} void TARGET_WRITE_COMPLEX_PART (rtx @var{cplx}, rtx @var{val}, complex_part_t @var{part}, bool @var{undefined_p})
 This hook should move the rtx value given by @var{val} to the specified @var{var} of the complex given by @var{cplx}.
   @var{var} can be the real part, the imaginary part, or both of them.
 @end deftypefn
diff --git a/gcc/emit-rtl.cc b/gcc/emit-rtl.cc
index 22012bfea13..f7c33c4afb1 100644
--- a/gcc/emit-rtl.cc
+++ b/gcc/emit-rtl.cc
@@ -946,7 +946,7 @@  validate_subreg (machine_mode omode, machine_mode imode,
      if this ought to be represented at all -- why can't this all be hidden
      in post-reload splitters that make arbitrarily mode changes to the
      registers themselves.  */
-  else if (VECTOR_MODE_P (omode)
+  else if ((VECTOR_MODE_P (omode) || COMPLEX_MODE_P (omode))
 	   && GET_MODE_INNER (omode) == GET_MODE_INNER (imode))
     ;
   /* Subregs involving floating point modes are not allowed to
diff --git a/gcc/explow.cc b/gcc/explow.cc
index 6424c0802f0..48572a40eab 100644
--- a/gcc/explow.cc
+++ b/gcc/explow.cc
@@ -56,7 +56,7 @@  trunc_int_for_mode (HOST_WIDE_INT c, machine_mode mode)
   int width = GET_MODE_PRECISION (smode);
 
   /* You want to truncate to a _what_?  */
-  gcc_assert (SCALAR_INT_MODE_P (mode));
+  gcc_assert (SCALAR_INT_MODE_P (mode) || COMPLEX_INT_MODE_P (mode));
 
   /* Canonicalize BImode to 0 and STORE_FLAG_VALUE.  */
   if (smode == BImode)
diff --git a/gcc/expr.cc b/gcc/expr.cc
index 12b74273144..01462486631 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -3842,8 +3842,14 @@  emit_move_complex_parts (rtx x, rtx y)
       && REG_P (x) && !reg_overlap_mentioned_p (x, y))
     emit_clobber (x);
 
-  write_complex_part (x, read_complex_part (y, REAL_P), REAL_P, true);
-  write_complex_part (x, read_complex_part (y, IMAG_P), IMAG_P, false);
+  machine_mode mode = GET_MODE (x);
+  if (optab_handler (mov_optab, mode) != CODE_FOR_nothing)
+    write_complex_part (x, read_complex_part (y, BOTH_P), BOTH_P, true);
+  else
+    {
+      write_complex_part (x, read_complex_part (y, REAL_P), REAL_P, true);
+      write_complex_part (x, read_complex_part (y, IMAG_P), IMAG_P, false);
+    }
 
   return get_last_insn ();
 }
@@ -3863,14 +3869,14 @@  emit_move_complex (machine_mode mode, rtx x, rtx y)
 
   /* See if we can coerce the target into moving both values at once, except
      for floating point where we favor moving as parts if this is easy.  */
-  if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
+  scalar_int_mode imode;
+  if (!int_mode_for_mode (mode).exists (&imode))
+    try_int = false;
+  else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
       && optab_handler (mov_optab, GET_MODE_INNER (mode)) != CODE_FOR_nothing
-      && !(REG_P (x)
-	   && HARD_REGISTER_P (x)
-	   && REG_NREGS (x) == 1)
-      && !(REG_P (y)
-	   && HARD_REGISTER_P (y)
-	   && REG_NREGS (y) == 1))
+      && optab_handler (mov_optab, mode) != CODE_FOR_nothing
+      && !(REG_P (x) && HARD_REGISTER_P (x))
+      && !(REG_P (y) && HARD_REGISTER_P (y)))
     try_int = false;
   /* Not possible if the values are inherently not adjacent.  */
   else if (GET_CODE (x) == CONCAT || GET_CODE (y) == CONCAT)
@@ -11044,6 +11050,48 @@  expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
 
 	  return original_target;
 	}
+      else if (original_target && (GET_CODE (original_target) == REG)
+	       &&
+	       ((GET_MODE_CLASS (GET_MODE (original_target)) ==
+		 MODE_COMPLEX_INT)
+		|| (GET_MODE_CLASS (GET_MODE (original_target)) ==
+		    MODE_COMPLEX_FLOAT)))
+	{
+	  mode = TYPE_MODE (TREE_TYPE (exp));
+
+	  /* Move both parts at the same time if it is possible.  */
+	  if (TREE_COMPLEX_BOTH_PARTS (exp) != NULL)
+	    {
+	      op0 = expand_expr (TREE_COMPLEX_BOTH_PARTS (exp),
+				 original_target, mode, EXPAND_NORMAL);
+	      write_complex_part (original_target, op0, BOTH_P, false);
+	    }
+	  else
+	    {
+	      mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (exp)));
+
+	      rtx rtarg = gen_reg_rtx (mode);
+	      rtx itarg = gen_reg_rtx (mode);
+	      op0 =
+		expand_expr (TREE_REALPART (exp), rtarg, mode, EXPAND_NORMAL);
+	      op1 =
+		expand_expr (TREE_IMAGPART (exp), itarg, mode, EXPAND_NORMAL);
+
+	      write_complex_part (original_target, op0, REAL_P, true);
+	      write_complex_part (original_target, op1, IMAG_P, false);
+	    }
+	  return original_target;
+	}
+      else if ((TREE_COMPLEX_BOTH_PARTS (exp) != NULL)
+	       && (known_le (GET_MODE_BITSIZE (mode), 2 * BITS_PER_WORD)))
+	{
+	  op0 =
+	    expand_expr (TREE_COMPLEX_BOTH_PARTS (exp), original_target, mode,
+			 EXPAND_NORMAL);
+	  rtx tmp = gen_reg_rtx (mode);
+	  write_complex_part (tmp, op0, BOTH_P, false);
+	  return tmp;
+	}
 
       /* fall through */
 
@@ -13391,6 +13439,10 @@  const_vector_from_tree (tree exp)
       else if (TREE_CODE (elt) == FIXED_CST)
 	builder.quick_push (CONST_FIXED_FROM_FIXED_VALUE
 			    (TREE_FIXED_CST (elt), inner));
+      else if (TREE_CODE (elt) == COMPLEX_CST)
+	builder.quick_push (expand_expr
+			    (TREE_COMPLEX_BOTH_PARTS (elt), NULL_RTX, mode,
+			     EXPAND_NORMAL));
       else
 	builder.quick_push (immed_wide_int_const (wi::to_poly_wide (elt),
 						  inner));
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index a01b7160303..c1c8e456320 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -2878,8 +2878,8 @@  expand_UADDC (internal_fn ifn, gcall *stmt)
   create_input_operand (&ops[3], op2, mode);
   create_input_operand (&ops[4], op3, mode);
   expand_insn (icode, 5, ops);
-  write_complex_part (target, re, false, false);
-  write_complex_part (target, im, true, false);
+  write_complex_part (target, re, REAL_P, false);
+  write_complex_part (target, im, IMAG_P, false);
 }
 
 /* Expand USUBC STMT.  */
diff --git a/gcc/machmode.h b/gcc/machmode.h
index a22df60dc20..fd87af7c74a 100644
--- a/gcc/machmode.h
+++ b/gcc/machmode.h
@@ -119,6 +119,14 @@  extern const unsigned char mode_class[NUM_MACHINE_MODES];
    || GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT \
    || GET_MODE_CLASS (MODE) == MODE_VECTOR_FLOAT)
 
+/* Nonzero if MODE is a complex integer mode.  */
+#define COMPLEX_INT_MODE_P(MODE) \
+   (GET_MODE_CLASS (MODE) == MODE_COMPLEX_INT)
+
+/* Nonzero if MODE is a complex floating-point mode.  */
+#define COMPLEX_FLOAT_MODE_P(MODE) \
+  (GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT)
+
 /* Nonzero if MODE is a complex mode.  */
 #define COMPLEX_MODE_P(MODE)			\
   (GET_MODE_CLASS (MODE) == MODE_COMPLEX_INT	\
diff --git a/gcc/optabs-libfuncs.cc b/gcc/optabs-libfuncs.cc
index f1abe6916d3..4bb56b2f0d5 100644
--- a/gcc/optabs-libfuncs.cc
+++ b/gcc/optabs-libfuncs.cc
@@ -190,19 +190,30 @@  gen_int_libfunc (optab optable, const char *opname, char suffix,
   int maxsize = 2 * BITS_PER_WORD;
   int minsize = BITS_PER_WORD;
   scalar_int_mode int_mode;
+  complex_mode cplx_int_mode;
+  int bitsize;
 
-  if (!is_int_mode (mode, &int_mode))
+  if (is_int_mode (mode, &int_mode))
+    bitsize = GET_MODE_BITSIZE (int_mode);
+  else if (is_complex_int_mode (mode, &cplx_int_mode))
+    bitsize = GET_MODE_BITSIZE (cplx_int_mode);
+  else
     return;
+
   if (maxsize < LONG_LONG_TYPE_SIZE)
     maxsize = LONG_LONG_TYPE_SIZE;
   if (minsize > INT_TYPE_SIZE
       && (trapv_binoptab_p (optable)
 	  || trapv_unoptab_p (optable)))
     minsize = INT_TYPE_SIZE;
-  if (GET_MODE_BITSIZE (int_mode) < minsize
-      || GET_MODE_BITSIZE (int_mode) > maxsize)
+
+  if (bitsize < minsize || bitsize > maxsize)
     return;
-  gen_libfunc (optable, opname, suffix, int_mode);
+
+  if (GET_MODE_CLASS (mode) == MODE_INT)
+    gen_libfunc (optable, opname, suffix, int_mode);
+  else
+    gen_libfunc (optable, opname, suffix, cplx_int_mode);
 }
 
 /* Like gen_libfunc, but verify that FP and set decimal prefix if needed.  */
@@ -280,9 +291,11 @@  void
 gen_intv_fp_libfunc (optab optable, const char *name, char suffix,
 		     machine_mode mode)
 {
-  if (DECIMAL_FLOAT_MODE_P (mode) || GET_MODE_CLASS (mode) == MODE_FLOAT)
+  if (DECIMAL_FLOAT_MODE_P (mode) || GET_MODE_CLASS (mode) == MODE_FLOAT
+      || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
     gen_fp_libfunc (optable, name, suffix, mode);
-  if (GET_MODE_CLASS (mode) == MODE_INT)
+  if (GET_MODE_CLASS (mode) == MODE_INT
+      || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
     {
       int len = strlen (name);
       char *v_name = XALLOCAVEC (char, len + 2);
diff --git a/gcc/real.h b/gcc/real.h
index 9ed6c372b14..53585418e68 100644
--- a/gcc/real.h
+++ b/gcc/real.h
@@ -189,7 +189,8 @@  extern const struct real_format *
 			: (gcc_unreachable (), 0)])
 
 #define FLOAT_MODE_FORMAT(MODE) \
-  (REAL_MODE_FORMAT (as_a <scalar_float_mode> (GET_MODE_INNER (MODE))))
+  (REAL_MODE_FORMAT (as_a <scalar_float_mode> \
+    (GET_MODE_INNER ((COMPLEX_FLOAT_MODE_P (MODE)) ? (GET_MODE_INNER (MODE)) : (MODE)))))
 
 /* The following macro determines whether the floating point format is
    composite, i.e. may contain non-consecutive mantissa bits, in which
diff --git a/gcc/recog.cc b/gcc/recog.cc
index 92f151248a6..8f53e93f566 100644
--- a/gcc/recog.cc
+++ b/gcc/recog.cc
@@ -1441,6 +1441,7 @@  general_operand (rtx op, machine_mode mode)
      if the caller wants something floating.  */
   if (GET_MODE (op) == VOIDmode && mode != VOIDmode
       && GET_MODE_CLASS (mode) != MODE_INT
+      && GET_MODE_CLASS (mode) != MODE_COMPLEX_INT
       && GET_MODE_CLASS (mode) != MODE_PARTIAL_INT)
     return false;
 
diff --git a/gcc/target.def b/gcc/target.def
index d63dacbbb8f..4eafff1d21b 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -3338,7 +3338,7 @@  DEFHOOK
  "This hook should move the rtx value given by @var{val} to the specified @var{var} of the complex given by @var{cplx}.\n\
   @var{var} can be the real part, the imaginary part, or both of them.",
  void,
- (rtx cplx, rtx val, complex_part_t part),
+ (rtx cplx, rtx val, complex_part_t part, bool undefined_p),
  default_write_complex_part)
 
 /* Support for named address spaces.  */
diff --git a/gcc/targhooks.cc b/gcc/targhooks.cc
index f6e7bc6c141..d89668cd1ab 100644
--- a/gcc/targhooks.cc
+++ b/gcc/targhooks.cc
@@ -1634,7 +1634,7 @@  default_read_complex_part (rtx cplx, complex_part_t part)
    BOTH_P, call recursively with REAL_P and IMAG_P.  */
 
 void
-default_write_complex_part (rtx cplx, rtx val, complex_part_t part)
+default_write_complex_part (rtx cplx, rtx val, complex_part_t part, bool undefined_p)
 {
   machine_mode cmode;
   scalar_mode imode;
@@ -1642,8 +1642,8 @@  default_write_complex_part (rtx cplx, rtx val, complex_part_t part)
 
   if (part == BOTH_P)
     {
-      write_complex_part (cplx, read_complex_part (val, REAL_P), REAL_P);
-      write_complex_part (cplx, read_complex_part (val, IMAG_P), IMAG_P);
+      write_complex_part (cplx, read_complex_part (val, REAL_P), REAL_P, false);
+      write_complex_part (cplx, read_complex_part (val, IMAG_P), IMAG_P, false);
       return;
     }
 
@@ -1696,7 +1696,7 @@  default_write_complex_part (rtx cplx, rtx val, complex_part_t part)
     }
 
   store_bit_field (cplx, ibitsize, (part == IMAG_P) ? ibitsize : 0, 0, 0,
-		   imode, val, false);
+		   imode, val, false, undefined_p);
 }
 
 /* By default do not split reductions further.  */
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index cf37eea24b5..f3ae17998de 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -128,7 +128,8 @@  extern rtx default_gen_rtx_complex (machine_mode mode, rtx real_part,
 				    rtx imag_part);
 extern rtx default_read_complex_part (rtx cplx, complex_part_t part);
 extern void default_write_complex_part (rtx cplx, rtx val,
-					complex_part_t part);
+					complex_part_t part,
+					bool undefined_p);
 
 /* OpenACC hooks.  */
 extern bool default_goacc_validate_dims (tree, int [], int, unsigned);
diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
index 047f9237dd4..30e99f812f1 100644
--- a/gcc/tree-ssa-forwprop.cc
+++ b/gcc/tree-ssa-forwprop.cc
@@ -3812,6 +3812,7 @@  pass_forwprop::execute (function *fun)
 		}
 	      else
 		gsi_next (&gsi);
+	      gsi_next (&gsi);
 	    }
 	  else if (code == CONSTRUCTOR
 		   && VECTOR_TYPE_P (TREE_TYPE (rhs))