c-family: Add _BitInt support for __atomic_*fetch* [PR102989]

Message ID ZMuE0AUpDPcENgeB@tucnak
State Unresolved
Headers
Series c-family: Add _BitInt support for __atomic_*fetch* [PR102989] |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

Jakub Jelinek Aug. 3, 2023, 10:43 a.m. UTC
  Hi!

The following patch implements the lowering of __atomic_*fetch* functions
where first argument is a pointer to (optionally _Atomic) _BitInt which
either doesn't have size 1, 2, 4, 8 or 16 bytes or has 16 byte size but
target doesn't support TImode.
Patch on top of the _BitInt patch series.

Tested on x86_64-linux.

2023-08-03  Jakub Jelinek  <jakub@redhat.com>

	PR c/102989
gcc/c-family/
	* c-common.cc (sync_resolve_size): Add ORIG_FORMAT argument.  If
	FETCH && !ORIG_FORMAT, type is BITINT_TYPE, return -1 if size isn't
	one of 1, 2, 4, 8 or 16 or if it is 16 but TImode is not supported.
	(atomic_bitint_fetch_using_cas_loop): New function.
	(resolve_overloaded_builtin): Adjust sync_resolve_size caller.  If
	-1 is returned, use atomic_bitint_fetch_using_cas_loop to lower it.
	Formatting fix.
gcc/testsuite/
	* gcc.dg/bitint-18.c: New test.


	Jakub
  

Comments

Joseph Myers Aug. 3, 2023, 5:19 p.m. UTC | #1
On Thu, 3 Aug 2023, Jakub Jelinek via Gcc-patches wrote:

> --- gcc/testsuite/gcc.dg/bitint-18.c.jj	2023-08-03 12:26:35.510922996 +0200
> +++ gcc/testsuite/gcc.dg/bitint-18.c	2023-08-03 12:26:42.114831050 +0200
> @@ -0,0 +1,44 @@
> +/* PR c/102989 */
> +/* { dg-do compile { target bitint } } */

It would be good to have execution tests for these operations (so probably 
in gcc.dg/atomic so that libatomic is linked in automatically as needed).
  

Patch

--- gcc/c-family/c-common.cc.jj	2023-07-11 15:28:55.119673958 +0200
+++ gcc/c-family/c-common.cc	2023-08-03 12:10:50.852085519 +0200
@@ -7190,12 +7190,16 @@  speculation_safe_value_resolve_return (t
 /* A helper function for resolve_overloaded_builtin in resolving the
    overloaded __sync_ builtins.  Returns a positive power of 2 if the
    first operand of PARAMS is a pointer to a supported data type.
-   Returns 0 if an error is encountered.
+   Returns 0 if an error is encountered.  Return -1 for _BitInt
+   __atomic*fetch* with unsupported type which should be handled by
+   a cas loop.
    FETCH is true when FUNCTION is one of the _FETCH_OP_ or _OP_FETCH_
+   built-ins.  ORIG_FORMAT is for __sync_* rather than __atomic_*
    built-ins.  */
 
 static int
-sync_resolve_size (tree function, vec<tree, va_gc> *params, bool fetch)
+sync_resolve_size (tree function, vec<tree, va_gc> *params, bool fetch,
+		   bool orig_format)
 {
   /* Type of the argument.  */
   tree argtype;
@@ -7230,9 +7234,19 @@  sync_resolve_size (tree function, vec<tr
     goto incompatible;
 
   size = tree_to_uhwi (TYPE_SIZE_UNIT (type));
+  if (size == 16
+      && fetch
+      && !orig_format
+      && TREE_CODE (type) == BITINT_TYPE
+      && !targetm.scalar_mode_supported_p (TImode))
+    return -1;
+
   if (size == 1 || size == 2 || size == 4 || size == 8 || size == 16)
     return size;
 
+  if (fetch && !orig_format && TREE_CODE (type) == BITINT_TYPE)
+    return -1;
+
  incompatible:
   /* Issue the diagnostic only if the argument is valid, otherwise
      it would be redundant at best and could be misleading.  */
@@ -7849,6 +7863,223 @@  resolve_overloaded_atomic_store (locatio
 }
 
 
+/* Emit __atomic*fetch* on _BitInt which doesn't have a size of
+   1, 2, 4, 8 or 16 bytes using __atomic_compare_exchange loop.
+   ORIG_CODE is the DECL_FUNCTION_CODE of ORIG_FUNCTION and
+   ORIG_PARAMS arguments of the call.  */
+
+static tree
+atomic_bitint_fetch_using_cas_loop (location_t loc,
+				    enum built_in_function orig_code,
+				    tree orig_function,
+				    vec<tree, va_gc> *orig_params)
+{
+  enum tree_code code = ERROR_MARK;
+  bool return_old_p = false;
+  switch (orig_code)
+    {
+    case BUILT_IN_ATOMIC_ADD_FETCH_N:
+      code = PLUS_EXPR;
+      break;
+    case BUILT_IN_ATOMIC_SUB_FETCH_N:
+      code = MINUS_EXPR;
+      break;
+    case BUILT_IN_ATOMIC_AND_FETCH_N:
+      code = BIT_AND_EXPR;
+      break;
+    case BUILT_IN_ATOMIC_NAND_FETCH_N:
+      break;
+    case BUILT_IN_ATOMIC_XOR_FETCH_N:
+      code = BIT_XOR_EXPR;
+      break;
+    case BUILT_IN_ATOMIC_OR_FETCH_N:
+      code = BIT_IOR_EXPR;
+      break;
+    case BUILT_IN_ATOMIC_FETCH_ADD_N:
+      code = PLUS_EXPR;
+      return_old_p = true;
+      break;
+    case BUILT_IN_ATOMIC_FETCH_SUB_N:
+      code = MINUS_EXPR;
+      return_old_p = true;
+      break;
+    case BUILT_IN_ATOMIC_FETCH_AND_N:
+      code = BIT_AND_EXPR;
+      return_old_p = true;
+      break;
+    case BUILT_IN_ATOMIC_FETCH_NAND_N:
+      return_old_p = true;
+      break;
+    case BUILT_IN_ATOMIC_FETCH_XOR_N:
+      code = BIT_XOR_EXPR;
+      return_old_p = true;
+      break;
+    case BUILT_IN_ATOMIC_FETCH_OR_N:
+      code = BIT_IOR_EXPR;
+      return_old_p = true;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  if (orig_params->length () != 3)
+    {
+      if (orig_params->length () < 3)
+	error_at (loc, "too few arguments to function %qE", orig_function);
+      else
+	error_at (loc, "too many arguments to function %qE", orig_function);
+      return error_mark_node;
+    }
+
+  tree stmts = push_stmt_list ();
+
+  tree nonatomic_lhs_type = TREE_TYPE (TREE_TYPE ((*orig_params)[0]));
+  nonatomic_lhs_type = TYPE_MAIN_VARIANT (nonatomic_lhs_type);
+  gcc_assert (TREE_CODE (nonatomic_lhs_type) == BITINT_TYPE);
+
+  tree lhs_addr = (*orig_params)[0];
+  tree val = convert (nonatomic_lhs_type, (*orig_params)[1]);
+  tree model = convert (integer_type_node, (*orig_params)[2]);
+  if (TREE_SIDE_EFFECTS (lhs_addr))
+    {
+      tree var = create_tmp_var_raw (TREE_TYPE (lhs_addr));
+      lhs_addr = build4 (TARGET_EXPR, TREE_TYPE (lhs_addr), var, lhs_addr,
+			 NULL_TREE, NULL_TREE);
+      add_stmt (lhs_addr);
+    }
+  if (TREE_SIDE_EFFECTS (val))
+    {
+      tree var = create_tmp_var_raw (nonatomic_lhs_type);
+      val = build4 (TARGET_EXPR, nonatomic_lhs_type, var, val, NULL_TREE,
+		    NULL_TREE);
+      add_stmt (val);
+    }
+  if (TREE_SIDE_EFFECTS (model))
+    {
+      tree var = create_tmp_var_raw (integer_type_node);
+      model = build4 (TARGET_EXPR, integer_type_node, var, model, NULL_TREE,
+		      NULL_TREE);
+      add_stmt (model);
+    }
+
+  tree old = create_tmp_var_raw (nonatomic_lhs_type);
+  tree old_addr = build_unary_op (loc, ADDR_EXPR, old, false);
+  TREE_ADDRESSABLE (old) = 1;
+  suppress_warning (old);
+
+  tree newval = create_tmp_var_raw (nonatomic_lhs_type);
+  tree newval_addr = build_unary_op (loc, ADDR_EXPR, newval, false);
+  TREE_ADDRESSABLE (newval) = 1;
+  suppress_warning (newval);
+
+  tree loop_decl = create_artificial_label (loc);
+  tree loop_label = build1 (LABEL_EXPR, void_type_node, loop_decl);
+
+  tree done_decl = create_artificial_label (loc);
+  tree done_label = build1 (LABEL_EXPR, void_type_node, done_decl);
+
+  vec<tree, va_gc> *params;
+  vec_alloc (params, 6);
+
+  /* __atomic_load (addr, &old, SEQ_CST).  */
+  tree fndecl = builtin_decl_explicit (BUILT_IN_ATOMIC_LOAD);
+  params->quick_push (lhs_addr);
+  params->quick_push (old_addr);
+  params->quick_push (build_int_cst (integer_type_node, MEMMODEL_RELAXED));
+  tree func_call = resolve_overloaded_builtin (loc, fndecl, params);
+  if (func_call == NULL_TREE)
+    func_call = build_function_call_vec (loc, vNULL, fndecl, params, NULL);
+  old = build4 (TARGET_EXPR, nonatomic_lhs_type, old, func_call, NULL_TREE,
+		NULL_TREE);
+  add_stmt (old);
+  params->truncate (0);
+
+  /* loop:  */
+  add_stmt (loop_label);
+
+  /* newval = old + val;  */
+  tree rhs;
+  switch (code)
+    {
+    case PLUS_EXPR:
+    case MINUS_EXPR:
+      if (!TYPE_OVERFLOW_WRAPS (nonatomic_lhs_type))
+	{
+	  tree utype
+	    = build_bitint_type (TYPE_PRECISION (nonatomic_lhs_type), 1);
+	  rhs = convert (nonatomic_lhs_type,
+			 build2_loc (loc, code, utype,
+				     convert (utype, old),
+				     convert (utype, val)));
+	}
+      else
+	rhs = build2_loc (loc, code, nonatomic_lhs_type, old, val);
+      break;
+    case BIT_AND_EXPR:
+    case BIT_IOR_EXPR:
+    case BIT_XOR_EXPR:
+      rhs = build2_loc (loc, code, nonatomic_lhs_type, old, val);
+      break;
+    case ERROR_MARK:
+      rhs = build2_loc (loc, BIT_AND_EXPR, nonatomic_lhs_type,
+			build1_loc (loc, BIT_NOT_EXPR,
+				    nonatomic_lhs_type, old), val);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  rhs = build4 (TARGET_EXPR, nonatomic_lhs_type, newval, rhs, NULL_TREE,
+		NULL_TREE);
+  SET_EXPR_LOCATION (rhs, loc);
+  add_stmt (rhs);
+
+  /* if (__atomic_compare_exchange (addr, &old, &new, false, model, model))
+       goto done;  */
+  fndecl = builtin_decl_explicit (BUILT_IN_ATOMIC_COMPARE_EXCHANGE);
+  params->quick_push (lhs_addr);
+  params->quick_push (old_addr);
+  params->quick_push (newval_addr);
+  params->quick_push (integer_zero_node);
+  params->quick_push (model);
+  if (tree_fits_uhwi_p (model)
+      && (tree_to_uhwi (model) == MEMMODEL_RELEASE
+	  || tree_to_uhwi (model) == MEMMODEL_ACQ_REL))
+    params->quick_push (build_int_cst (integer_type_node, MEMMODEL_RELAXED));
+  else
+    params->quick_push (model);
+  func_call = resolve_overloaded_builtin (loc, fndecl, params);
+  if (func_call == NULL_TREE)
+    func_call = build_function_call_vec (loc, vNULL, fndecl, params, NULL);
+
+  tree goto_stmt = build1 (GOTO_EXPR, void_type_node, done_decl);
+  SET_EXPR_LOCATION (goto_stmt, loc);
+
+  tree stmt
+    = build3 (COND_EXPR, void_type_node, func_call, goto_stmt, NULL_TREE);
+  SET_EXPR_LOCATION (stmt, loc);
+  add_stmt (stmt);
+
+  /* goto loop;  */
+  goto_stmt = build1 (GOTO_EXPR, void_type_node, loop_decl);
+  SET_EXPR_LOCATION (goto_stmt, loc);
+  add_stmt (goto_stmt);
+
+  /* done:  */
+  add_stmt (done_label);
+
+  tree ret = create_tmp_var_raw (nonatomic_lhs_type);
+  stmt = build2_loc (loc, MODIFY_EXPR, void_type_node, ret,
+		     return_old_p ? old : newval);
+  add_stmt (stmt);
+
+  /* Finish the compound statement.  */
+  stmts = pop_stmt_list (stmts);
+
+  return build4 (TARGET_EXPR, nonatomic_lhs_type, ret, stmts, NULL_TREE,
+		 NULL_TREE);
+}
+
+
 /* Some builtin functions are placeholders for other expressions.  This
    function should be called immediately after parsing the call expression
    before surrounding code has committed to the type of the expression.
@@ -8030,19 +8261,22 @@  resolve_overloaded_builtin (location_t l
 	/* The following are not _FETCH_OPs and must be accepted with
 	   pointers to _Bool (or C++ bool).  */
 	if (fetch_op)
-	  fetch_op =
-	    (orig_code != BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N
-	     && orig_code != BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
-	     && orig_code != BUILT_IN_SYNC_LOCK_TEST_AND_SET_N
-	     && orig_code != BUILT_IN_SYNC_LOCK_RELEASE_N);
+	  fetch_op = (orig_code != BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N
+		      && orig_code != BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
+		      && orig_code != BUILT_IN_SYNC_LOCK_TEST_AND_SET_N
+		      && orig_code != BUILT_IN_SYNC_LOCK_RELEASE_N);
 
-	int n = sync_resolve_size (function, params, fetch_op);
+	int n = sync_resolve_size (function, params, fetch_op, orig_format);
 	tree new_function, first_param, result;
 	enum built_in_function fncode;
 
 	if (n == 0)
 	  return error_mark_node;
 
+	if (n == -1)
+	  return atomic_bitint_fetch_using_cas_loop (loc, orig_code,
+						     function, params);
+
 	fncode = (enum built_in_function)((int)orig_code + exact_log2 (n) + 1);
 	new_function = builtin_decl_explicit (fncode);
 	if (!sync_resolve_params (loc, function, new_function, params,
--- gcc/testsuite/gcc.dg/bitint-18.c.jj	2023-08-03 12:26:35.510922996 +0200
+++ gcc/testsuite/gcc.dg/bitint-18.c	2023-08-03 12:26:42.114831050 +0200
@@ -0,0 +1,44 @@ 
+/* PR c/102989 */
+/* { dg-do compile { target bitint } } */
+/* { dg-options "-std=c2x -pedantic-errors" } */
+
+_Atomic _BitInt(15) a;
+_Atomic(_BitInt(15)) b;
+_Atomic _BitInt(115) c;
+_Atomic _BitInt(192) d;
+_Atomic _BitInt(575) e;
+unsigned _BitInt(575) f;
+
+__attribute__((noipa)) _BitInt(575)
+foo (_BitInt(575) x)
+{
+  return x;
+}
+
+__attribute__((noipa)) int
+bar (int x)
+{
+  return x;
+}
+
+__attribute__((noipa)) _Atomic _BitInt(575) *
+baz (_Atomic _BitInt(575) *x)
+{
+  return x;
+}
+
+int
+main ()
+{
+  a += 1wb;
+  b -= 2wb;
+  c *= 3wb;
+  d /= 4wb;
+  e -= 5wb;
+  f = __atomic_fetch_add (&e, 54342985743985743985743895743834298574985734895743895734895wb, __ATOMIC_SEQ_CST);
+  f += __atomic_sub_fetch (&e, 13110356772307144130089534440127211568864891923061809853784155727841516341877716905506658630804426134644404380556711020290072702485839594283061059349912463486203837251238365wb, __ATOMIC_SEQ_CST);
+  f += __atomic_fetch_and (&e, -33740418462630594385361724744395454079240140931656245750192534103967695265126850678980088699287669565365078793986191778469857714756111026776864987769580622009237241167211461wb, __ATOMIC_RELAXED);
+  f += __atomic_xor_fetch (&e, 30799001892772360282132495459823194445423296347702377756575214695893559890977912003055702776548378201752339680602420936304294728688029412276600086349055079523071860836114234wb, __ATOMIC_SEQ_CST);
+  f += __atomic_fetch_or (baz (&e), foo (-6581969867283727911005990155704642154324773504588160884865628865547696324844988049982401783508268917375066790729408659617189350524019843499435572226770089390885472550659255wb), bar (__ATOMIC_RELAXED));
+  f += __atomic_nand_fetch (&e, 55047840194947228224723671648125013926111290688378416557548660662319034233151051252215595447712248992759177463741832904590457754423713378627482465906620631734790561114905369wb, __ATOMIC_ACQ_REL);
+}