[committed] hppa: Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV

Message ID Zb0wvpMcBB5I-dps@mx3210.localdomain
State Accepted
Headers
Series [committed] hppa: Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

John David Anglin Feb. 2, 2024, 6:13 p.m. UTC
  Tested on hppa-unknown-linux-gnu and hppa64-hp-hpux11.11.

This is the first step in fixing PR target/59778.  libatomic/fenv.c
needs fixing for hppa so exceptions are correctly raised.

Committed to trunk.

Dave
---

hppa: Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV

This change implements __builtin_get_fpsr() and __builtin_set_fpsr(x)
to get and set the floating-point status register.  They are used to
implement pa_atomic_assign_expand_fenv().

2024-02-02  John David Anglin  <danglin@gcc.gnu.org>

gcc/ChangeLog:

	PR target/59778
	* config/pa/pa.cc (enum pa_builtins): Add PA_BUILTIN_GET_FPSR
	and PA_BUILTIN_SET_FPSR builtins.
	* (pa_builtins_icode): Declare.
	* (def_builtin, pa_fpu_init_builtins): New.
	* (pa_init_builtins): Initialize FPU builtins.
	* (pa_builtin_decl, pa_expand_builtin_1): New.
	* (pa_expand_builtin): Handle PA_BUILTIN_GET_FPSR and
	PA_BUILTIN_SET_FPSR builtins.
	* (pa_atomic_assign_expand_fenv): New.
	* config/pa/pa.md (UNSPECV_GET_FPSR, UNSPECV_SET_FPSR): New
	UNSPECV constants.
	(get_fpsr, put_fpsr): New expanders.
	(get_fpsr_32, get_fpsr_64, set_fpsr_32, set_fpsr_64): New
	insn patterns.
  

Patch

diff --git a/gcc/config/pa/pa.cc b/gcc/config/pa/pa.cc
index c58b0a0d75e..694123e37c9 100644
--- a/gcc/config/pa/pa.cc
+++ b/gcc/config/pa/pa.cc
@@ -28,6 +28,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "target.h"
 #include "rtl.h"
 #include "tree.h"
+#include "gimple.h"
 #include "df.h"
 #include "tm_p.h"
 #include "stringpool.h"
@@ -142,6 +143,7 @@  static void pa_asm_out_destructor (rtx, int);
 #endif
 static void pa_init_builtins (void);
 static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int);
+static tree pa_builtin_decl (unsigned, bool);
 static rtx hppa_builtin_saveregs (void);
 static void hppa_va_start (tree, rtx);
 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
@@ -205,6 +207,7 @@  static bool pa_modes_tieable_p (machine_mode, machine_mode);
 static bool pa_can_change_mode_class (machine_mode, machine_mode, reg_class_t);
 static HOST_WIDE_INT pa_starting_frame_offset (void);
 static section* pa_elf_select_rtx_section(machine_mode, rtx, unsigned HOST_WIDE_INT) ATTRIBUTE_UNUSED;
+static void pa_atomic_assign_expand_fenv (tree *, tree *, tree *);
 
 /* The following extra sections are only used for SOM.  */
 static GTY(()) section *som_readonly_data_section;
@@ -314,9 +317,10 @@  static size_t n_deferred_plabels = 0;
 
 #undef TARGET_INIT_BUILTINS
 #define TARGET_INIT_BUILTINS pa_init_builtins
-
 #undef TARGET_EXPAND_BUILTIN
 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
+#undef  TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL  pa_builtin_decl
 
 #undef TARGET_REGISTER_MOVE_COST
 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
@@ -426,6 +430,9 @@  static size_t n_deferred_plabels = 0;
 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE
 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
 
+#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
+#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV pa_atomic_assign_expand_fenv
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 /* Parse the -mfixed-range= option string.  */
@@ -592,6 +599,10 @@  pa_option_override (void)
 
 enum pa_builtins
 {
+  /* FPU builtins.  */
+  PA_BUILTIN_GET_FPSR,
+  PA_BUILTIN_SET_FPSR,
+
   PA_BUILTIN_COPYSIGNQ,
   PA_BUILTIN_FABSQ,
   PA_BUILTIN_INFQ,
@@ -600,10 +611,48 @@  enum pa_builtins
 };
 
 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
+static GTY(()) enum insn_code pa_builtins_icode[(int) PA_BUILTIN_max];
+
+/* Add a PA  builtin function with NAME, ICODE, CODE and TYPE.  Return the
+   function decl or NULL_TREE if the builtin was not added.  */
+
+static tree
+def_builtin (const char *name, enum insn_code icode, enum pa_builtins code,
+	     tree type)
+{
+  tree t
+    = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
+
+  if (t)
+    {
+      pa_builtins[code] = t;
+      pa_builtins_icode[code] = icode;
+    }
+
+  return t;
+}
+
+/* Create builtin functions for FPU instructions.  */
+
+static void
+pa_fpu_init_builtins (void)
+{
+  tree ftype;
+
+  ftype = build_function_type_list (unsigned_type_node, 0);
+  def_builtin ("__builtin_get_fpsr", CODE_FOR_get_fpsr,
+	       PA_BUILTIN_GET_FPSR, ftype);
+  ftype = build_function_type_list (void_type_node, unsigned_type_node, 0);
+  def_builtin ("__builtin_set_fpsr", CODE_FOR_set_fpsr,
+	       PA_BUILTIN_SET_FPSR, ftype);
+}
 
 static void
 pa_init_builtins (void)
 {
+  if (!TARGET_SOFT_FLOAT)
+    pa_fpu_init_builtins ();
+
 #ifdef DONT_HAVE_FPUTC_UNLOCKED
   {
     tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
@@ -663,6 +712,92 @@  pa_init_builtins (void)
     }
 }
 
+/* Implement TARGET_BUILTIN_DECL.  */
+
+static tree
+pa_builtin_decl (unsigned int code, bool initialize_p ATTRIBUTE_UNUSED)
+{
+  if (code >= PA_BUILTIN_max)
+    return error_mark_node;
+  return pa_builtins[code];
+}
+
+static rtx
+pa_expand_builtin_1 (tree exp, rtx target,
+		     rtx subtarget ATTRIBUTE_UNUSED,
+		     machine_mode tmode ATTRIBUTE_UNUSED,
+		     int ignore ATTRIBUTE_UNUSED)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  enum pa_builtins code
+    = (enum pa_builtins) DECL_MD_FUNCTION_CODE (fndecl);
+  enum insn_code icode = pa_builtins_icode[code];
+  bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
+  call_expr_arg_iterator iter;
+  int arg_count = 0;
+  rtx pat, op[4];
+  tree arg;
+
+  if (nonvoid)
+    {
+      machine_mode tmode = insn_data[icode].operand[0].mode;
+      if (!target
+	  || GET_MODE (target) != tmode
+	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+	op[0] = gen_reg_rtx (tmode);
+      else
+	op[0] = target;
+    }
+  else
+    op[0] = NULL_RTX;
+
+  FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
+    {
+      const struct insn_operand_data *insn_op;
+      int idx;
+
+      if (arg == error_mark_node)
+	return NULL_RTX;
+
+      arg_count++;
+      idx = arg_count - !nonvoid;
+      insn_op = &insn_data[icode].operand[idx];
+      op[arg_count] = expand_normal (arg);
+
+      if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
+							insn_op->mode))
+	op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
+    }
+
+  switch (arg_count)
+    {
+    case 0:
+      pat = GEN_FCN (icode) (op[0]);
+      break;
+    case 1:
+      if (nonvoid)
+	pat = GEN_FCN (icode) (op[0], op[1]);
+      else
+	pat = GEN_FCN (icode) (op[1]);
+      break;
+    case 2:
+      pat = GEN_FCN (icode) (op[0], op[1], op[2]);
+      break;
+    case 3:
+      pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  if (!pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+
+  return (nonvoid ? op[0] : const0_rtx);
+}
+
 static rtx
 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
 		   machine_mode mode ATTRIBUTE_UNUSED,
@@ -673,6 +808,10 @@  pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
 
   switch (fcode)
     {
+    case PA_BUILTIN_GET_FPSR:
+    case PA_BUILTIN_SET_FPSR:
+      return pa_expand_builtin_1 (exp, target, subtarget, mode, ignore);
+
     case PA_BUILTIN_FABSQ:
     case PA_BUILTIN_COPYSIGNQ:
       return expand_call (exp, target, ignore);
@@ -11099,4 +11238,78 @@  pa_function_arg_size (machine_mode mode, const_tree type)
   return (int) CEIL (size, UNITS_PER_WORD);
 }
 
+/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV.  */
+
+static void
+pa_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
+{
+  const unsigned PA_FE_INEXACT = 1;
+  const unsigned PA_FE_UNDERFLOW = 2;
+  const unsigned PA_FE_OVERFLOW = 4;
+  const unsigned PA_FE_DIVBYZERO = 8;
+  const unsigned PA_FE_INVALID = 16;
+  const unsigned HOST_WIDE_INT PA_FE_ALL_EXCEPT = (PA_FE_INVALID
+						   | PA_FE_DIVBYZERO
+						   | PA_FE_OVERFLOW
+						   | PA_FE_UNDERFLOW
+						   | PA_FE_INEXACT);
+  const unsigned HOST_WIDE_INT PA_FE_EXCEPT_SHIFT = 27;
+  tree fenv_var, get_fpsr, set_fpsr, mask, ld_fenv, masked_fenv;
+  tree hold_all, new_fenv_var, reload_fenv, restore_fnenv;
+  tree get_fpsr_call, set_fpsr_call, update_call, atomic_feraiseexcept;
+
+  if (TARGET_SOFT_FLOAT)
+    return;
+
+  /* Generate the equivalent of :
+       unsigned int fenv_var;
+       fenv_var = __builtin_get_fpsr ();
+
+       unsigned int masked_fenv;
+       masked_fenv = fenv_var & mask;
+
+       __builtin_set_fpsr (masked_fenv);  */
+
+  fenv_var = create_tmp_var_raw (unsigned_type_node);
+  get_fpsr = pa_builtins[PA_BUILTIN_GET_FPSR];
+  set_fpsr = pa_builtins[PA_BUILTIN_SET_FPSR];
+  mask = build_int_cst (unsigned_type_node,
+			~((PA_FE_ALL_EXCEPT << PA_FE_EXCEPT_SHIFT)
+			  | PA_FE_ALL_EXCEPT));
+
+  get_fpsr_call = build_call_expr (get_fpsr, 0);
+  ld_fenv = build4 (TARGET_EXPR, unsigned_type_node,
+		    fenv_var, get_fpsr_call,
+		    NULL_TREE, NULL_TREE);
+  masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
+  hold_all = build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv);
+  set_fpsr_call = build_call_expr (set_fpsr, 1, masked_fenv);
+  *hold = build2 (COMPOUND_EXPR, void_type_node, hold_all, set_fpsr_call);
+
+  /* Store the value of masked_fenv to clear the exceptions:
+     __builtin_set_fpsr (masked_fenv);  */
+
+  *clear = set_fpsr_call;
+
+  /* Generate the equivalent of :
+       unsigned int new_fenv_var;
+       new_fenv_var = __builtin_get_fpsr ();
+
+       __builtin_set_fpsr (fenv_var);
+
+       __atomic_feraiseexcept (new_fenv_var);  */
+
+  new_fenv_var = create_tmp_var_raw (unsigned_type_node);
+  reload_fenv = build4 (TARGET_EXPR, unsigned_type_node, new_fenv_var,
+			get_fpsr_call, NULL_TREE, NULL_TREE);
+  restore_fnenv = build_call_expr (set_fpsr, 1, fenv_var);
+  atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
+  update_call = build_call_expr (atomic_feraiseexcept, 1,
+				 fold_convert (integer_type_node,
+					       new_fenv_var));
+  *update = build2 (COMPOUND_EXPR, void_type_node,
+		    build2 (COMPOUND_EXPR, void_type_node,
+			    reload_fenv, restore_fnenv), update_call);
+}
+
 #include "gt-pa.h"
diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md
index 52ad0c3776d..3bcb8473479 100644
--- a/gcc/config/pa/pa.md
+++ b/gcc/config/pa/pa.md
@@ -96,6 +96,8 @@ 
    UNSPECV_OPC		; outline_prologue_call
    UNSPECV_OEC		; outline_epilogue_call
    UNSPECV_LONGJMP	; builtin_longjmp
+   UNSPECV_GET_FPSR	; get floating-point status register
+   UNSPECV_SET_FPSR	; set floating-point status register
   ])
 
 ;; Maximum pc-relative branch offsets.
@@ -10784,3 +10786,85 @@  add,l %2,%3,%3\;bv,n %%r0(%3)"
   "ldo 15(%%sp),%1\n\t{dep|depw} %%r0,31,3,%1\n\t{ldcw|ldcw,co} 0(%1),%1"
   [(set_attr "type" "binary")
    (set_attr "length" "12")])
+
+;; Get floating-point status register.
+
+(define_expand "get_fpsr"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(const_int 0)] UNSPECV_GET_FPSR))]
+  ""
+{
+  if (TARGET_SOFT_FLOAT)
+    FAIL;
+
+  if (TARGET_64BIT)
+    emit_insn (gen_get_fpsr_64 (operands[0]));
+  else
+    emit_insn (gen_get_fpsr_32 (operands[0]));
+  DONE;
+})
+
+;; The floating-point status register is stored to an unused slot in
+;; the frame marker and then loaded to register operand 0.  The final
+;; floating-point load restores the T bit in the status register.
+
+;; The final load might be avoided if a word mode store was used to
+;; store the status register.  It is unclear why we need a double-word
+;; store.  I suspect PA 1.0 didn't support single-word stores of the
+;; status register.
+
+(define_insn "get_fpsr_32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(const_int 0)] UNSPECV_GET_FPSR))]
+  "!TARGET_SOFT_FLOAT && !TARGET_64BIT"
+  "{fstds|fstd} %%fr0,-16(%%sp)\n\tldw -16(%%sp),%0\n\t{fldds|fldd} -16(%%sp),%%fr0"
+  [(set_attr "type" "fpstore_load")
+   (set_attr "length" "12")])
+
+;; The 64-bit pattern is similar to the 32-bit pattern except we need
+;; compute the address of the frame location as long displacements aren't
+;; supported on Linux targets.
+
+(define_insn "get_fpsr_64"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(const_int 0)] UNSPECV_GET_FPSR))
+   (clobber (match_scratch:DI 1 "=&r"))]
+  "!TARGET_SOFT_FLOAT && TARGET_64BIT"
+  "ldo -40(%%sp),%1\n\tfstd %%fr0,0(%1)\n\tldw 0(%1),%0\n\tfldd 0(%1),%%fr0"
+  [(set_attr "type" "fpstore_load")
+   (set_attr "length" "16")])
+
+;; Set floating-point status register.
+
+(define_expand "set_fpsr"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPSR)]
+  ""
+{
+  if (TARGET_SOFT_FLOAT)
+    FAIL;
+
+  if (TARGET_64BIT)
+    emit_insn (gen_set_fpsr_64 (operands[0]));
+  else
+    emit_insn (gen_set_fpsr_32 (operands[0]));
+  DONE;
+})
+
+;; The old T bit is extracted and stored in the new status register.
+
+(define_insn "set_fpsr_32"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPSR)
+   (clobber (match_scratch:SI 1 "=&r"))]
+  "!TARGET_SOFT_FLOAT && !TARGET_64BIT"
+  "{fstds|fstd} %%fr0,-16(%%sp)\n\tldw -16(%%sp),%1\n\t{extru|extrw,u} %1,25,1,%1\n\t{dep|depw} %1,25,1,%0\n\tstw %0,-16(%%sp)\n\t{fldds|fldd} -16(%%sp),%%fr0"
+  [(set_attr "type" "store_fpload")
+   (set_attr "length" "24")])
+
+(define_insn "set_fpsr_64"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPSR)
+   (clobber (match_scratch:DI 1 "=&r"))
+   (clobber (match_scratch:SI 2 "=&r"))]
+  "!TARGET_SOFT_FLOAT && TARGET_64BIT"
+  "ldo -40(%%sp),%1\n\tfstd %%fr0,0(%1)\n\tldw 0(%1),%2\n\textrw,u %2,25,1,%2\n\tdepw %2,25,1,%0\n\tstw %0,0(%1)\n\tfldd 0(%1),%%fr0"
+  [(set_attr "type" "store_fpload")
+   (set_attr "length" "28")])