@@ -133,9 +133,6 @@ along with GCC; see the file COPYING3. If not see
#undef TARGET_GAS
#define TARGET_GAS 1
-#undef TARGET_SYNC_LIBCALL
-#define TARGET_SYNC_LIBCALL 1
-
/* The SYNC operations are implemented as library functions, not
INSN patterns. As a result, the HAVE defines for the patterns are
not defined. We need to define them to generate the corresponding
@@ -5940,8 +5940,8 @@ pa_init_libfuncs (void)
"_U_Qfcnvxf_udbl_to_quad");
}
- if (TARGET_SYNC_LIBCALL)
- init_sync_libfuncs (8);
+ if (TARGET_SYNC_LIBCALLS)
+ init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
}
/* HP's millicode routines mean something special to the assembler.
@@ -72,10 +72,12 @@ extern unsigned long total_code_bytes;
#define HPUX_LONG_DOUBLE_LIBRARY 0
#endif
-/* Linux kernel atomic operation support. */
-#ifndef TARGET_SYNC_LIBCALL
-#define TARGET_SYNC_LIBCALL 0
-#endif
+/* Sync libcall support. */
+#define TARGET_SYNC_LIBCALLS (flag_sync_libcalls)
+
+/* The maximum size of the sync library functions supported. DImode
+ is supported on 32-bit targets using floating point loads and stores. */
+#define MAX_SYNC_LIBFUNC_SIZE 8
/* The following three defines are potential target switches. The current
defines are optimal given the current capabilities of GAS and GNU ld. */
@@ -173,6 +175,8 @@ do { \
builtin_define("_PA_RISC1_0"); \
if (HPUX_LONG_DOUBLE_LIBRARY) \
builtin_define("__SIZEOF_FLOAT128__=16"); \
+ if (TARGET_SOFT_FLOAT) \
+ builtin_define("__SOFTFP__"); \
} while (0)
/* An old set of OS defines for various BSD-like systems. */
@@ -10360,7 +10360,23 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
;; doubleword loads and stores are not guaranteed to be atomic
;; when referencing the I/O address space.
-;; These patterns are at the bottom so the non atomic versions are preferred.
+;; Atomic and sync libcalls use different lock sets. Great care is
+;; needed if both are used in a single application.
+
+;; Atomic load and store libcalls are enabled by the -matomic-libcalls
+;; option. This option is not enabled by default as the generated
+;; libcalls depend on libatomic which is not built until the end of
+;; the gcc build. For loads, we only need an atomic libcall for DImode.
+;; Sync libcalls are not generated when atomic libcalls are enabled.
+
+;; Sync libcalls are enabled by default when supported. They can be
+;; disabled by the -fno-sync-libcalls option. Sync libcalls always
+;; use a single memory store in their implementation, even for DImode.
+;; DImode stores are done using either std or fstd. Thus, we only
+;; need a sync load libcall for DImode when we don't have an atomic
+;; processor load available for the mode (TARGET_SOFT_FLOAT).
+
+;; Implement atomic QImode store using exchange.
(define_expand "atomic_storeqi"
[(match_operand:QI 0 "memory_operand") ;; memory
@@ -10368,19 +10384,30 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
(match_operand:SI 2 "const_int_operand")] ;; model
""
{
- if (TARGET_SYNC_LIBCALL)
+ rtx addr, libfunc;
+
+ if (TARGET_SYNC_LIBCALLS)
{
- rtx libfunc = optab_libfunc (sync_lock_test_and_set_optab, QImode);
- rtx addr = convert_memory_address (Pmode, XEXP (operands[0], 0));
+ addr = convert_memory_address (Pmode, XEXP (operands[0], 0));
+ libfunc = optab_libfunc (sync_lock_test_and_set_optab, QImode);
+ emit_library_call (libfunc, LCT_NORMAL, VOIDmode, addr, Pmode,
+ operands[1], QImode);
+ DONE;
+ }
+ if (TARGET_ATOMIC_LIBCALLS)
+ {
+ addr = convert_memory_address (Pmode, XEXP (operands[0], 0));
+ libfunc = init_one_libfunc ("__atomic_exchange_1");
emit_library_call (libfunc, LCT_NORMAL, VOIDmode, addr, Pmode,
operands[1], QImode);
DONE;
}
+
FAIL;
})
-;; Implement atomic HImode stores using exchange.
+;; Implement atomic HImode store using exchange.
(define_expand "atomic_storehi"
[(match_operand:HI 0 "memory_operand") ;; memory
@@ -10388,15 +10415,26 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
(match_operand:SI 2 "const_int_operand")] ;; model
""
{
- if (TARGET_SYNC_LIBCALL)
+ rtx addr, libfunc;
+
+ if (TARGET_SYNC_LIBCALLS)
{
- rtx libfunc = optab_libfunc (sync_lock_test_and_set_optab, HImode);
- rtx addr = convert_memory_address (Pmode, XEXP (operands[0], 0));
+ addr = convert_memory_address (Pmode, XEXP (operands[0], 0));
+ libfunc = optab_libfunc (sync_lock_test_and_set_optab, HImode);
+ emit_library_call (libfunc, LCT_NORMAL, VOIDmode, addr, Pmode,
+ operands[1], HImode);
+ DONE;
+ }
+ if (TARGET_ATOMIC_LIBCALLS)
+ {
+ addr = convert_memory_address (Pmode, XEXP (operands[0], 0));
+ libfunc = init_one_libfunc ("__atomic_exchange_2");
emit_library_call (libfunc, LCT_NORMAL, VOIDmode, addr, Pmode,
operands[1], HImode);
DONE;
}
+
FAIL;
})
@@ -10408,20 +10446,39 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
(match_operand:SI 2 "const_int_operand")] ;; model
""
{
- if (TARGET_SYNC_LIBCALL)
+ rtx addr, libfunc;
+
+ if (TARGET_SYNC_LIBCALLS)
{
- rtx libfunc = optab_libfunc (sync_lock_test_and_set_optab, SImode);
- rtx addr = convert_memory_address (Pmode, XEXP (operands[0], 0));
+ addr = convert_memory_address (Pmode, XEXP (operands[0], 0));
+ libfunc = optab_libfunc (sync_lock_test_and_set_optab, SImode);
+ emit_library_call (libfunc, LCT_NORMAL, VOIDmode, addr, Pmode,
+ operands[1], SImode);
+ DONE;
+ }
+ if (TARGET_ATOMIC_LIBCALLS)
+ {
+ addr = convert_memory_address (Pmode, XEXP (operands[0], 0));
+ libfunc = init_one_libfunc ("__atomic_exchange_4");
emit_library_call (libfunc, LCT_NORMAL, VOIDmode, addr, Pmode,
operands[1], SImode);
DONE;
}
+
FAIL;
})
;; Implement atomic DImode load.
+;; We need an atomic or sync libcall whenever the processor load or
+;; store used for DImode is not atomic. The 32-bit libatomic
+;; implementation uses a pair of stw instructions. They are not
+;; atomic, so we need to call __atomic_load_8. The linux libgcc
+;; sync implementation uses a std or fstd instruction. They are
+;; atomic, so we only need to call __sync_load_8 when the load
+;; operation would not be atomic (e.g., 32-bit TARGET_SOFT_FLOAT).
+
(define_expand "atomic_loaddi"
[(match_operand:DI 0 "register_operand") ;; val out
(match_operand:DI 1 "memory_operand") ;; memory
@@ -10429,12 +10486,35 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
""
{
enum memmodel model;
+ rtx addr, libfunc;
- if (TARGET_64BIT || TARGET_SOFT_FLOAT)
+ if (TARGET_64BIT)
FAIL;
+ if (TARGET_SYNC_LIBCALLS && MAX_SYNC_LIBFUNC_SIZE >= 8 && TARGET_SOFT_FLOAT)
+ {
+ addr = convert_memory_address (Pmode, XEXP (operands[1], 0));
+ libfunc = init_one_libfunc ("__sync_load_8");
+ emit_library_call_value (libfunc, operands[0], LCT_NORMAL, DImode,
+ addr, Pmode);
+ DONE;
+ }
+
+ if (TARGET_ATOMIC_LIBCALLS && TARGET_SOFT_FLOAT)
+ {
+ addr = convert_memory_address (Pmode, XEXP (operands[1], 0));
+ libfunc = init_one_libfunc ("__atomic_load_8");
+ emit_library_call_value (libfunc, operands[0], LCT_NORMAL, DImode,
+ addr, Pmode);
+ DONE;
+ }
+
+ if (TARGET_SOFT_FLOAT)
+ FAIL;
+
+ /* Fallback to processor load with barriers. */
model = memmodel_from_int (INTVAL (operands[2]));
- operands[1] = force_reg (SImode, XEXP (operands[1], 0));
+ operands[1] = force_reg (Pmode, XEXP (operands[1], 0));
if (is_mm_seq_cst (model))
expand_mem_thread_fence (model);
emit_insn (gen_atomic_loaddi_1 (operands[0], operands[1]));
@@ -10460,12 +10540,21 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
""
{
enum memmodel model;
+ rtx addr, libfunc;
- if (TARGET_SYNC_LIBCALL)
+ if (TARGET_SYNC_LIBCALLS && MAX_SYNC_LIBFUNC_SIZE >= 8)
{
- rtx libfunc = optab_libfunc (sync_lock_test_and_set_optab, DImode);
- rtx addr = convert_memory_address (Pmode, XEXP (operands[0], 0));
+ addr = convert_memory_address (Pmode, XEXP (operands[0], 0));
+ libfunc = optab_libfunc (sync_lock_test_and_set_optab, DImode);
+ emit_library_call (libfunc, LCT_NORMAL, VOIDmode, addr, Pmode,
+ operands[1], DImode);
+ DONE;
+ }
+ if (TARGET_ATOMIC_LIBCALLS)
+ {
+ addr = convert_memory_address (Pmode, XEXP (operands[0], 0));
+ libfunc = init_one_libfunc ("__atomic_exchange_8");
emit_library_call (libfunc, LCT_NORMAL, VOIDmode, addr, Pmode,
operands[1], DImode);
DONE;
@@ -10474,8 +10563,9 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
if (TARGET_64BIT || TARGET_SOFT_FLOAT)
FAIL;
+ /* Fallback to processor store with barriers. */
model = memmodel_from_int (INTVAL (operands[2]));
- operands[0] = force_reg (SImode, XEXP (operands[0], 0));
+ operands[0] = force_reg (Pmode, XEXP (operands[0], 0));
if (operands[1] != CONST0_RTX (DImode))
operands[1] = force_reg (DImode, operands[1]);
expand_mem_thread_fence (model);
@@ -37,6 +37,10 @@ march=2.0
Target RejectNegative
Generate PA2.0 code (requires binutils 2.10 or later).
+matomic-libcalls
+Target Var(TARGET_ATOMIC_LIBCALLS) Init(1)
+Generate libcalls for atomic loads and stores when sync libcalls are disabled.
+
mbig-switch
Target Ignore
Does nothing. Preserved for backward compatibility.
@@ -975,17 +975,18 @@ Objective-C and Objective-C++ Dialects}.
@emph{HPPA Options}
@gccoptlist{-march=@var{architecture-type} @gol
+-matomic-libcalls -mbig-switch @gol
-mcaller-copies -mdisable-fpregs -mdisable-indexing @gol
--mfast-indirect-calls -mgas -mgnu-ld -mhp-ld @gol
+-mordered -mfast-indirect-calls -mgas -mgnu-ld -mhp-ld @gol
-mfixed-range=@var{register-range} @gol
--mjump-in-delay -mlinker-opt -mlong-calls @gol
--mlong-load-store -mno-disable-fpregs @gol
+-mcoherent-ldcw -mjump-in-delay -mlinker-opt -mlong-calls @gol
+-mlong-load-store -mno-atomic-libcalls -mno-disable-fpregs @gol
-mno-disable-indexing -mno-fast-indirect-calls -mno-gas @gol
-mno-jump-in-delay -mno-long-load-store @gol
-mno-portable-runtime -mno-soft-float @gol
-mno-space-regs -msoft-float -mpa-risc-1-0 @gol
-mpa-risc-1-1 -mpa-risc-2-0 -mportable-runtime @gol
--mschedule=@var{cpu-type} -mspace-regs -msio -mwsio @gol
+-mschedule=@var{cpu-type} -mspace-regs -msoft-mult -msio -mwsio @gol
-munix=@var{unix-std} -nolibdld -static -threads}
@emph{IA-64 Options}
@@ -24891,6 +24892,33 @@ other way around.
@opindex mpa-risc-2-0
Synonyms for @option{-march=1.0}, @option{-march=1.1}, and @option{-march=2.0} respectively.
+@item -matomic-libcalls
+@opindex matomic-libcalls
+@opindex mno-atomic-libcalls
+Generate libcalls for atomic loads and stores when sync libcalls are disabled.
+This option is enabled by default. It only affects the generation of
+atomic libcalls by the HPPA backend.
+
+Both the sync and @file{libatomic} libcall implementations use locking.
+As a result, processor stores are not atomic with respect to other
+atomic operations. Processor loads up to DImode are atomic with
+respect to other atomic operations provided they are implemented as
+a single access.
+
+The PA-RISC architecture does not support any atomic operations in
+hardware except for the @code{ldcw} instruction. Thus, all atomic
+support is implemented using sync and atomic libcalls. Sync libcall
+support is in @file{libgcc.a}. Atomic libcall support is in
+@file{libatomic}.
+
+This option generates @code{__atomic_exchange} calls for atomic stores.
+It also provides special handling for atomic DImode accesses on 32-bit
+targets.
+
+@item -mbig-switch
+@opindex mbig-switch
+Does nothing. Preserved for backward compatibility.
+
@item -mcaller-copies
@opindex mcaller-copies
The caller copies function arguments passed by hidden reference. This
@@ -24899,30 +24927,19 @@ option should be used with care as it is not compatible with the default
passed by hidden reference and the option provides better compatibility
with OpenMP.
-@item -mjump-in-delay
-@opindex mjump-in-delay
-This option is ignored and provided for compatibility purposes only.
+@item -mcoherent-ldcw
+@opindex mcoherent-ldcw
+Use ldcw/ldcd coherent cache-control hint.
@item -mdisable-fpregs
@opindex mdisable-fpregs
-Prevent floating-point registers from being used in any manner. This is
-necessary for compiling kernels that perform lazy context switching of
-floating-point registers. If you use this option and attempt to perform
-floating-point operations, the compiler aborts.
+Disable floating-point registers. Equivalent to @code{-msoft-float}.
@item -mdisable-indexing
@opindex mdisable-indexing
Prevent the compiler from using indexing address modes. This avoids some
rather obscure problems when compiling MIG generated code under MACH@.
-@item -mno-space-regs
-@opindex mno-space-regs
-@opindex mspace-regs
-Generate code that assumes the target has no space registers. This allows
-GCC to generate faster indirect calls and use unscaled index address modes.
-
-Such code is suitable for level 0 PA systems and kernels.
-
@item -mfast-indirect-calls
@opindex mfast-indirect-calls
Generate code that assumes calls never cross space boundaries. This
@@ -24939,57 +24956,10 @@ useful when compiling kernel code. A register range is specified as
two registers separated by a dash. Multiple register ranges can be
specified separated by a comma.
-@item -mlong-load-store
-@opindex mlong-load-store
-Generate 3-instruction load and store sequences as sometimes required by
-the HP-UX 10 linker. This is equivalent to the @samp{+k} option to
-the HP compilers.
-
-@item -mportable-runtime
-@opindex mportable-runtime
-Use the portable calling conventions proposed by HP for ELF systems.
-
@item -mgas
@opindex mgas
Enable the use of assembler directives only GAS understands.
-@item -mschedule=@var{cpu-type}
-@opindex mschedule
-Schedule code according to the constraints for the machine type
-@var{cpu-type}. The choices for @var{cpu-type} are @samp{700}
-@samp{7100}, @samp{7100LC}, @samp{7200}, @samp{7300} and @samp{8000}. Refer
-to @file{/usr/lib/sched.models} on an HP-UX system to determine the
-proper scheduling option for your machine. The default scheduling is
-@samp{8000}.
-
-@item -mlinker-opt
-@opindex mlinker-opt
-Enable the optimization pass in the HP-UX linker. Note this makes symbolic
-debugging impossible. It also triggers a bug in the HP-UX 8 and HP-UX 9
-linkers in which they give bogus error messages when linking some programs.
-
-@item -msoft-float
-@opindex msoft-float
-Generate output containing library calls for floating point.
-@strong{Warning:} the requisite libraries are not available for all HPPA
-targets. Normally the facilities of the machine's usual C compiler are
-used, but this cannot be done directly in cross-compilation. You must make
-your own arrangements to provide suitable library functions for
-cross-compilation.
-
-@option{-msoft-float} changes the calling convention in the output file;
-therefore, it is only useful if you compile @emph{all} of a program with
-this option. In particular, you need to compile @file{libgcc.a}, the
-library that comes with GCC, with @option{-msoft-float} in order for
-this to work.
-
-@item -msio
-@opindex msio
-Generate the predefine, @code{_SIO}, for server IO@. The default is
-@option{-mwsio}. This generates the predefines, @code{__hp9000s700},
-@code{__hp9000s700__} and @code{_WSIO}, for workstation IO@. These
-options are available under HP-UX and HI-UX@.
-
@item -mgnu-ld
@opindex mgnu-ld
Use options specific to GNU @command{ld}.
@@ -25019,6 +24989,12 @@ configure option, GCC's program search path, and finally by the user's
`gcc -print-prog-name=ld`}. This option is only available on the 64-bit
HP-UX GCC, i.e.@: configured with @samp{hppa*64*-*-hpux*}.
+@item -mlinker-opt
+@opindex mlinker-opt
+Enable the optimization pass in the HP-UX linker. Note this makes symbolic
+debugging impossible. It also triggers a bug in the HP-UX 8 and HP-UX 9
+linkers in which they give bogus error messages when linking some programs.
+
@item -mlong-calls
@opindex mno-long-calls
@opindex mlong-calls
@@ -25047,6 +25023,69 @@ symbol-difference or pc-relative calls should be relatively small.
However, an indirect call is used on 32-bit ELF systems in pic code
and it is quite long.
+@item -mlong-load-store
+@opindex mlong-load-store
+Generate 3-instruction load and store sequences as sometimes required by
+the HP-UX 10 linker. This is equivalent to the @samp{+k} option to
+the HP compilers.
+
+@item -mjump-in-delay
+@opindex mjump-in-delay
+This option is ignored and provided for compatibility purposes only.
+
+@item -mno-space-regs
+@opindex mno-space-regs
+@opindex mspace-regs
+Generate code that assumes the target has no space registers. This allows
+GCC to generate faster indirect calls and use unscaled index address modes.
+
+Such code is suitable for level 0 PA systems and kernels.
+
+@item -mordered
+@opindex mordered
+Assume memory references are ordered and barriers are not needed.
+
+@item -mportable-runtime
+@opindex mportable-runtime
+Use the portable calling conventions proposed by HP for ELF systems.
+
+@item -mschedule=@var{cpu-type}
+@opindex mschedule
+Schedule code according to the constraints for the machine type
+@var{cpu-type}. The choices for @var{cpu-type} are @samp{700}
+@samp{7100}, @samp{7100LC}, @samp{7200}, @samp{7300} and @samp{8000}. Refer
+to @file{/usr/lib/sched.models} on an HP-UX system to determine the
+proper scheduling option for your machine. The default scheduling is
+@samp{8000}.
+
+@item -msio
+@opindex msio
+Generate the predefine, @code{_SIO}, for server IO@. The default is
+@option{-mwsio}. This generates the predefines, @code{__hp9000s700},
+@code{__hp9000s700__} and @code{_WSIO}, for workstation IO@. These
+options are available under HP-UX and HI-UX@.
+
+@item -msoft-float
+@opindex msoft-float
+Generate output containing library calls for floating point.
+@strong{Warning:} the requisite libraries are not available for all HPPA
+targets. Normally the facilities of the machine's usual C compiler are
+used, but this cannot be done directly in cross-compilation. You must make
+your own arrangements to provide suitable library functions for
+cross-compilation.
+
+@option{-msoft-float} changes the calling convention in the output file;
+therefore, it is only useful if you compile @emph{all} of a program with
+this option. In particular, you need to compile @file{libgcc.a}, the
+library that comes with GCC, with @option{-msoft-float} in order for
+this to work.
+
+@item -msoft-mult
+@opindex msoft-mult
+Use software integer multiplication.
+
+This disables the use of the @code{xmpyu} instruction.
+
@item -munix=@var{unix-std}
@opindex march
Generate compiler predefines and select a startfile for the specified
@@ -633,7 +633,7 @@ h8300-*-linux*)
tm_file="$tm_file h8300/h8300-lib.h"
;;
hppa*64*-*-linux*)
- tmake_file="$tmake_file pa/t-linux64 pa/t-dimode"
+ tmake_file="$tmake_file pa/t-pa64-linux pa/t-dimode"
tmake_file="$tmake_file pa/t-softfp-sfdftf t-softfp"
extra_parts="crtbegin.o crtbeginS.o crtbeginT.o crtend.o crtendS.o"
;;
@@ -649,7 +649,7 @@ hppa*-*-linux*)
md_unwind_header=pa/linux-unwind.h
;;
hppa*64*-*-hpux11*)
- tmake_file="$tmake_file pa/t-hpux pa/t-pa64 pa/t-dimode"
+ tmake_file="$tmake_file pa/t-pa64-hpux pa/t-dimode"
tmake_file="$tmake_file pa/t-stublib t-libgcc-pic t-slibgcc"
# Set the libgcc version number
if test x$ac_cv_sjlj_exceptions = xyes; then
@@ -32,6 +32,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
typedef unsigned char u8;
typedef short unsigned int u16;
+typedef unsigned int u32;
#ifdef __LP64__
typedef long unsigned int u64;
#else
@@ -115,6 +116,36 @@ __kernel_cmpxchg2 (volatile void *mem, const void *oldval, const void *newval,
#define MASK_1 0xffu
#define MASK_2 0xffffu
+/* Load value with an atomic processor load if possible. */
+#define ATOMIC_LOAD(TYPE, WIDTH) \
+ static inline TYPE \
+ atomic_load_##WIDTH (volatile void *ptr) \
+ { \
+ return *(volatile TYPE *)ptr; \
+ }
+
+#if defined(__LP64__) || defined(__SOFTFP__)
+ATOMIC_LOAD (u64, 8)
+#else
+static inline u64
+atomic_load_8 (volatile void *ptr)
+{
+ u64 result;
+ double tmp;
+
+ asm volatile ("{fldds|fldd} 0(%2),%1\n\t"
+ "{fstds|fstd} %1,-16(%%sp)\n\t"
+ "{ldws|ldw} -16(%%sp),%0\n\t"
+ "{ldws|ldw} -12(%%sp),%R0"
+ : "=r" (result), "=f" (tmp) : "r" (ptr): "memory");
+ return result;
+}
+#endif
+
+ATOMIC_LOAD (u32, 4)
+ATOMIC_LOAD (u16, 2)
+ATOMIC_LOAD (u8, 1)
+
#define FETCH_AND_OP_2(OP, PFX_OP, INF_OP, TYPE, WIDTH, INDEX) \
TYPE HIDDEN \
__sync_fetch_and_##OP##_##WIDTH (volatile void *ptr, TYPE val) \
@@ -123,7 +154,7 @@ __kernel_cmpxchg2 (volatile void *mem, const void *oldval, const void *newval,
long failure; \
\
do { \
- tmp = __atomic_load_n ((volatile TYPE *)ptr, __ATOMIC_RELAXED); \
+ tmp = atomic_load_##WIDTH ((volatile TYPE *)ptr); \
newval = PFX_OP (tmp INF_OP val); \
failure = __kernel_cmpxchg2 (ptr, &tmp, &newval, INDEX); \
} while (failure != 0); \
@@ -160,7 +191,7 @@ FETCH_AND_OP_2 (nand, ~, &, u8, 1, 0)
long failure; \
\
do { \
- tmp = __atomic_load_n ((volatile TYPE *)ptr, __ATOMIC_RELAXED); \
+ tmp = atomic_load_##WIDTH ((volatile TYPE *)ptr); \
newval = PFX_OP (tmp INF_OP val); \
failure = __kernel_cmpxchg2 (ptr, &tmp, &newval, INDEX); \
} while (failure != 0); \
@@ -197,8 +228,7 @@ OP_AND_FETCH_2 (nand, ~, &, u8, 1, 0)
long failure; \
\
do { \
- tmp = __atomic_load_n ((volatile unsigned int *)ptr, \
- __ATOMIC_RELAXED); \
+ tmp = atomic_load_4 ((volatile unsigned int *)ptr); \
failure = __kernel_cmpxchg (ptr, tmp, PFX_OP (tmp INF_OP val)); \
} while (failure != 0); \
\
@@ -220,8 +250,7 @@ FETCH_AND_OP_WORD (nand, ~, &)
long failure; \
\
do { \
- tmp = __atomic_load_n ((volatile unsigned int *)ptr, \
- __ATOMIC_RELAXED); \
+ tmp = atomic_load_4 ((volatile unsigned int *)ptr); \
failure = __kernel_cmpxchg (ptr, tmp, PFX_OP (tmp INF_OP val)); \
} while (failure != 0); \
\
@@ -247,8 +276,7 @@ typedef unsigned char bool;
\
while (1) \
{ \
- actual_oldval = __atomic_load_n ((volatile TYPE *)ptr, \
- __ATOMIC_RELAXED); \
+ actual_oldval = atomic_load_##WIDTH ((volatile TYPE *)ptr); \
\
if (__builtin_expect (oldval != actual_oldval, 0)) \
return actual_oldval; \
@@ -281,8 +309,7 @@ __sync_val_compare_and_swap_4 (volatile void *ptr, unsigned int oldval,
while (1)
{
- actual_oldval = __atomic_load_n ((volatile unsigned int *)ptr,
- __ATOMIC_RELAXED);
+ actual_oldval = atomic_load_4 ((volatile unsigned int *)ptr);
if (__builtin_expect (oldval != actual_oldval, 0))
return actual_oldval;
@@ -310,8 +337,7 @@ TYPE HIDDEN \
long failure; \
\
do { \
- oldval = __atomic_load_n ((volatile TYPE *)ptr, \
- __ATOMIC_RELAXED); \
+ oldval = atomic_load_##WIDTH ((volatile TYPE *)ptr); \
failure = __kernel_cmpxchg2 (ptr, &oldval, &val, INDEX); \
} while (failure != 0); \
\
@@ -322,14 +348,14 @@ SYNC_LOCK_TEST_AND_SET_2 (u64, 8, 3)
SYNC_LOCK_TEST_AND_SET_2 (u16, 2, 1)
SYNC_LOCK_TEST_AND_SET_2 (u8, 1, 0)
-unsigned int HIDDEN
+u32 HIDDEN
__sync_lock_test_and_set_4 (volatile void *ptr, unsigned int val)
{
long failure;
unsigned int oldval;
do {
- oldval = __atomic_load_n ((volatile unsigned int *)ptr, __ATOMIC_RELAXED);
+ oldval = atomic_load_4 ((volatile unsigned int *)ptr);
failure = __kernel_cmpxchg (ptr, oldval, val);
} while (failure != 0);
@@ -344,8 +370,7 @@ __sync_lock_test_and_set_4 (volatile void *ptr, unsigned int val)
long failure; \
\
do { \
- oldval = __atomic_load_n ((volatile TYPE *)ptr, \
- __ATOMIC_RELAXED); \
+ oldval = atomic_load_##WIDTH ((volatile TYPE *)ptr); \
failure = __kernel_cmpxchg2 (ptr, &oldval, &val, INDEX); \
} while (failure != 0); \
}
@@ -361,7 +386,27 @@ __sync_lock_release_4 (volatile void *ptr)
unsigned int oldval;
do {
- oldval = __atomic_load_n ((volatile unsigned int *)ptr, __ATOMIC_RELAXED);
+ oldval = atomic_load_4 ((volatile unsigned int *)ptr);
failure = __kernel_cmpxchg (ptr, oldval, 0);
} while (failure != 0);
}
+
+#ifndef __LP64__
+#define SYNC_LOCK_LOAD_2(TYPE, WIDTH, INDEX) \
+ TYPE __sync_lock_load_##WIDTH (volatile void *) HIDDEN; \
+ TYPE \
+ __sync_lock_load_##WIDTH (volatile void *ptr) \
+ { \
+ TYPE oldval; \
+ long failure; \
+ \
+ do { \
+ oldval = atomic_load_##WIDTH ((volatile TYPE *)ptr); \
+ failure = __kernel_cmpxchg2 (ptr, &oldval, &oldval, INDEX); \
+ } while (failure != 0); \
+ \
+ return oldval; \
+ }
+
+SYNC_LOCK_LOAD_2 (u64, 8, 3)
+#endif
new file mode 100644
@@ -0,0 +1,324 @@
+/* PA-RISC sync libfunc support.
+ Copyright (C) 2008-2023 Free Software Foundation, Inc.
+ Based on code contributed by CodeSourcery for ARM EABI Linux.
+ Modifications for PA Linux by Helge Deller <deller@gmx.de>
+ Revised for general use by John David Anglin <danglin@gcc.gnu.org>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+typedef unsigned char u8;
+typedef short unsigned int u16;
+typedef unsigned int u32;
+#ifdef __LP64__
+typedef long unsigned int u64;
+#else
+typedef long long unsigned int u64;
+#endif
+
+/* PA-RISC 2.0 supports out-of-order execution for loads and stores.
+ Thus, we need to synchonize memory accesses. For more info, see:
+ "Advanced Performance Features of the 64-bit PA-8000" by Doug Hunt. */
+
+typedef volatile int __attribute__((aligned (16))) ldcw_t;
+static ldcw_t __atomicity_lock = 1;
+
+/* We want default visibility for the sync routines. */
+#undef VISIBILITY
+#if defined(__hpux__) && !defined(__LP64__)
+#define VISIBILITY
+#else
+#define VISIBILITY __attribute__ ((visibility ("default")))
+#endif
+
+/* Perform ldcw operation in cache when possible. The ldcw instruction
+ is a full barrier. */
+#ifndef _PA_LDCW_INSN
+# ifdef _PA_RISC2_0
+# define _PA_LDCW_INSN "ldcw,co"
+# else
+# define _PA_LDCW_INSN "ldcw"
+# endif
+#endif
+
+static inline void
+__sync_spin_lock (void)
+{
+ ldcw_t *lock = &__atomicity_lock;
+ int tmp;
+
+ __asm__ __volatile__ (_PA_LDCW_INSN " 0(%1),%0\n\t"
+ "cmpib,<>,n 0,%0,.+20\n\t"
+ "ldw,ma 0(%1),%0\n\t"
+ "cmpib,<> 0,%0,.-12\n\t"
+ "nop\n\t"
+ "b,n .-12"
+ : "=&r" (tmp)
+ : "r" (lock)
+ : "memory");
+}
+
+static inline void
+__sync_spin_unlock (void)
+{
+ ldcw_t *lock = &__atomicity_lock;
+ int tmp = 1;
+
+ /* Use ordered store for release. */
+ __asm__ __volatile__ ("stw,ma %1,0(%0)"
+ : : "r" (lock), "r" (tmp) : "memory");
+}
+
+/* Load value with an atomic processor load if possible. */
+#define ATOMIC_LOAD(TYPE, WIDTH) \
+ static inline TYPE \
+ atomic_load_##WIDTH (volatile void *ptr) \
+ { \
+ return *(volatile TYPE *)ptr; \
+ }
+
+#if defined(__LP64__) || defined(__SOFTFP__)
+ATOMIC_LOAD (u64, 8)
+#else
+static inline u64
+atomic_load_8 (volatile void *ptr)
+{
+ u64 result;
+ double tmp;
+
+ asm volatile ("{fldds|fldd} 0(%2),%1\n\t"
+ "{fstds|fstd} %1,-16(%%sp)\n\t"
+ "{ldws|ldw} -16(%%sp),%0\n\t"
+ "{ldws|ldw} -12(%%sp),%R0"
+ : "=r" (result), "=f" (tmp) : "r" (ptr): "memory");
+ return result;
+}
+#endif
+
+ATOMIC_LOAD (u32, 4)
+ATOMIC_LOAD (u16, 2)
+ATOMIC_LOAD (u8, 1)
+
+/* Store value with an atomic processor store if possible. */
+#define ATOMIC_STORE(TYPE, WIDTH) \
+ static inline void \
+ atomic_store_##WIDTH (volatile void *ptr, TYPE value) \
+ { \
+ *(volatile TYPE *)ptr = value; \
+ }
+
+#if defined(__LP64__) || defined(__SOFTFP__)
+ATOMIC_STORE (u64, 8)
+#else
+static inline void
+atomic_store_8 (volatile void *ptr, u64 value)
+{
+ double tmp;
+
+ asm volatile ("stws|stw} %2,-16(%%sp)\n\t"
+ "{stws|stw} %R2,-12(%%sp)\n\t"
+ "{fldds|fldd} -16(%%sp),%1\n\t"
+ "{fstds|fstd} %1,0(%0)"
+ : "=m" (ptr), "=&f" (tmp) : "r" (value): "memory");
+}
+#endif
+
+ATOMIC_STORE (u32, 4)
+ATOMIC_STORE (u16, 2)
+ATOMIC_STORE (u8, 1)
+
+#define FETCH_AND_OP(OP, PFX_OP, INF_OP, TYPE, WIDTH) \
+ TYPE VISIBILITY \
+ __sync_fetch_and_##OP##_##WIDTH (volatile void *ptr, TYPE val) \
+ { \
+ TYPE tmp, newval; \
+ \
+ __sync_spin_lock(); \
+ tmp = atomic_load_##WIDTH (ptr); \
+ newval = PFX_OP (tmp INF_OP val); \
+ atomic_store_##WIDTH (ptr, newval); \
+ __sync_spin_unlock(); \
+ \
+ return tmp; \
+ }
+
+FETCH_AND_OP (add, , +, u64, 8)
+FETCH_AND_OP (sub, , -, u64, 8)
+FETCH_AND_OP (or, , |, u64, 8)
+FETCH_AND_OP (and, , &, u64, 8)
+FETCH_AND_OP (xor, , ^, u64, 8)
+FETCH_AND_OP (nand, ~, &, u64, 8)
+
+FETCH_AND_OP (add, , +, u32, 4)
+FETCH_AND_OP (sub, , -, u32, 4)
+FETCH_AND_OP (or, , |, u32, 4)
+FETCH_AND_OP (and, , &, u32, 4)
+FETCH_AND_OP (xor, , ^, u32, 4)
+FETCH_AND_OP (nand, ~, &, u32, 4)
+
+FETCH_AND_OP (add, , +, u16, 2)
+FETCH_AND_OP (sub, , -, u16, 2)
+FETCH_AND_OP (or, , |, u16, 2)
+FETCH_AND_OP (and, , &, u16, 2)
+FETCH_AND_OP (xor, , ^, u16, 2)
+FETCH_AND_OP (nand, ~, &, u16, 2)
+
+FETCH_AND_OP (add, , +, u8, 1)
+FETCH_AND_OP (sub, , -, u8, 1)
+FETCH_AND_OP (or, , |, u8, 1)
+FETCH_AND_OP (and, , &, u8, 1)
+FETCH_AND_OP (xor, , ^, u8, 1)
+FETCH_AND_OP (nand, ~, &, u8, 1)
+
+#define OP_AND_FETCH(OP, PFX_OP, INF_OP, TYPE, WIDTH) \
+ TYPE VISIBILITY \
+ __sync_##OP##_and_fetch_##WIDTH (volatile void *ptr, TYPE val) \
+ { \
+ TYPE tmp, newval; \
+ \
+ __sync_spin_lock(); \
+ tmp = atomic_load_##WIDTH (ptr); \
+ newval = PFX_OP (tmp INF_OP val); \
+ atomic_store_##WIDTH (ptr, newval); \
+ __sync_spin_unlock(); \
+ \
+ return newval; \
+ }
+
+OP_AND_FETCH (add, , +, u64, 8)
+OP_AND_FETCH (sub, , -, u64, 8)
+OP_AND_FETCH (or, , |, u64, 8)
+OP_AND_FETCH (and, , &, u64, 8)
+OP_AND_FETCH (xor, , ^, u64, 8)
+OP_AND_FETCH (nand, ~, &, u64, 8)
+
+OP_AND_FETCH (add, , +, u32, 4)
+OP_AND_FETCH (sub, , -, u32, 4)
+OP_AND_FETCH (or, , |, u32, 4)
+OP_AND_FETCH (and, , &, u32, 4)
+OP_AND_FETCH (xor, , ^, u32, 4)
+OP_AND_FETCH (nand, ~, &, u32, 4)
+
+OP_AND_FETCH (add, , +, u16, 2)
+OP_AND_FETCH (sub, , -, u16, 2)
+OP_AND_FETCH (or, , |, u16, 2)
+OP_AND_FETCH (and, , &, u16, 2)
+OP_AND_FETCH (xor, , ^, u16, 2)
+OP_AND_FETCH (nand, ~, &, u16, 2)
+
+OP_AND_FETCH (add, , +, u8, 1)
+OP_AND_FETCH (sub, , -, u8, 1)
+OP_AND_FETCH (or, , |, u8, 1)
+OP_AND_FETCH (and, , &, u8, 1)
+OP_AND_FETCH (xor, , ^, u8, 1)
+OP_AND_FETCH (nand, ~, &, u8, 1)
+
+#define COMPARE_AND_SWAP(TYPE, WIDTH) \
+ TYPE VISIBILITY \
+ __sync_val_compare_and_swap_##WIDTH (volatile void *ptr, TYPE oldval, \
+ TYPE newval) \
+ { \
+ TYPE actual_oldval; \
+ \
+ __sync_spin_lock(); \
+ actual_oldval = atomic_load_##WIDTH (ptr); \
+ if (actual_oldval == oldval) \
+ atomic_store_##WIDTH (ptr, newval); \
+ __sync_spin_unlock(); \
+ \
+ return actual_oldval; \
+ } \
+ \
+ _Bool VISIBILITY \
+ __sync_bool_compare_and_swap_##WIDTH (volatile void *ptr, \
+ TYPE oldval, TYPE newval) \
+ { \
+ TYPE actual_oldval; \
+ _Bool result; \
+ \
+ __sync_spin_lock(); \
+ actual_oldval = atomic_load_##WIDTH (ptr); \
+ result = (actual_oldval == oldval); \
+ if (result) \
+ atomic_store_##WIDTH (ptr, newval); \
+ __sync_spin_unlock(); \
+ \
+ return result; \
+ }
+
+COMPARE_AND_SWAP (u64, 8)
+COMPARE_AND_SWAP (u32, 4)
+COMPARE_AND_SWAP (u16, 2)
+COMPARE_AND_SWAP (u8, 1)
+
+#define SYNC_LOCK_TEST_AND_SET(TYPE, WIDTH) \
+TYPE VISIBILITY \
+ __sync_lock_test_and_set_##WIDTH (volatile void *ptr, TYPE val) \
+ { \
+ TYPE oldval; \
+ \
+ __sync_spin_lock(); \
+ oldval = atomic_load_##WIDTH (ptr); \
+ atomic_store_##WIDTH (ptr, val); \
+ __sync_spin_unlock(); \
+ \
+ return oldval; \
+ }
+
+SYNC_LOCK_TEST_AND_SET (u64, 8)
+SYNC_LOCK_TEST_AND_SET (u32, 4)
+SYNC_LOCK_TEST_AND_SET (u16, 2)
+SYNC_LOCK_TEST_AND_SET (u8, 1)
+
+#define SYNC_LOCK_RELEASE(TYPE, WIDTH) \
+ void VISIBILITY \
+ __sync_lock_release_##WIDTH (volatile void *ptr) \
+ { \
+ TYPE val = 0; \
+ \
+ __sync_spin_lock(); \
+ atomic_store_##WIDTH (ptr, val); \
+ __sync_spin_unlock(); \
+ }
+
+SYNC_LOCK_RELEASE (u64, 8)
+SYNC_LOCK_RELEASE (u32, 4)
+SYNC_LOCK_RELEASE (u16, 2)
+SYNC_LOCK_RELEASE (u8, 1)
+
+#define SYNC_LOCK_LOAD(TYPE, WIDTH) \
+TYPE VISIBILITY __sync_lock_load_##WIDTH (volatile void *); \
+TYPE VISIBILITY \
+ __sync_lock_load_##WIDTH (volatile void *ptr) \
+ { \
+ TYPE oldval; \
+ \
+ __sync_spin_lock(); \
+ oldval = atomic_load_##WIDTH (ptr); \
+ __sync_spin_unlock(); \
+ \
+ return oldval; \
+ }
+
+SYNC_LOCK_LOAD (u64, 8)
+SYNC_LOCK_LOAD (u32, 4)
+SYNC_LOCK_LOAD (u16, 2)
+SYNC_LOCK_LOAD (u8, 1)
@@ -7,3 +7,4 @@ LIB1ASMFUNCS = _divI _divU _remI _remU _div_const _mulI _dyncall
HOST_LIBGCC2_CFLAGS += -DELF=1 -DLINUX=1
LIB2ADD = $(srcdir)/config/pa/fptr.c
+LIB2ADD_ST = $(srcdir)/config/pa/sync-libfuncs.c
@@ -7,3 +7,4 @@ LIB1ASMFUNCS = _divI _divU _remI _remU _div_const _mulI _dyncall
HOST_LIBGCC2_CFLAGS += -DELF=1 -DLINUX=1
LIB2ADD = $(srcdir)/config/pa/fptr.c
+LIB2ADD_ST = $(srcdir)/config/pa/sync-libfuncs.c
new file mode 100644
@@ -0,0 +1,4 @@
+LIB2ADD = $(srcdir)/config/pa/quadlib.c
+LIB2ADD_ST = $(srcdir)/config/pa/sync-libfuncs.c
+
+HOST_LIBGCC2_CFLAGS += -frandom-seed=fixed-seed -Dpa64=1 -DELF=1 -mlong-calls
new file mode 100644
@@ -0,0 +1,8 @@
+# Plug millicode routines into libgcc.a We want these on both native and
+# cross compiles.
+LIB1ASMSRC = pa/milli64.S
+LIB1ASMFUNCS = _divI _divU _remI _remU _div_const _mulI
+
+HOST_LIBGCC2_CFLAGS += -Dpa64=1 -DELF=1 -DLINUX=1
+
+LIB2ADD_ST = $(srcdir)/config/pa/linux-atomic.c