libatomic: Add support for LSE and LSE2
Checks
Commit Message
Add support for AArch64 LSE and LSE2 to libatomic. Disable outline atomics,
and use LSE ifuncs for 1-8 byte atomics and LSE2 ifuncs for 16-byte atomics.
On Neoverse V1, 16-byte atomics are ~4x faster due to avoiding locks.
Note this is safe since we swap all 16-byte atomics using the same ifunc,
so they either use locks or LSE2 atomics, but never a mix. This also improves
ABI compatibility with LLVM: its inlined 16-byte atomics are compatible with
the new libatomic if LSE2 is supported.
Passes regress, OK for commit?
libatomic/
Makefile.in: Regenerated with automake 1.15.1.
Makefile.am: Add atomic_16.S for AArch64.
configure.tgt: Disable outline atomics in AArch64 build.
config/linux/aarch64/atomic_16.S: New file - implementation of
ifuncs for 128-bit atomics.
config/linux/aarch64/host-config.h: Enable ifuncs, use LSE (HWCAP_ATOMICS)
for 1-8-byte atomics and LSE2 (HWCAP_USCAT) for 16-byte atomics.
---
Comments
Wilco Dijkstra via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> Add support for AArch64 LSE and LSE2 to libatomic. Disable outline atomics,
> and use LSE ifuncs for 1-8 byte atomics and LSE2 ifuncs for 16-byte atomics.
> On Neoverse V1, 16-byte atomics are ~4x faster due to avoiding locks.
>
> Note this is safe since we swap all 16-byte atomics using the same ifunc,
> so they either use locks or LSE2 atomics, but never a mix. This also improves
> ABI compatibility with LLVM: its inlined 16-byte atomics are compatible with
> the new libatomic if LSE2 is supported.
>
> Passes regress, OK for commit?
>
> libatomic/
> Makefile.in: Regenerated with automake 1.15.1.
> Makefile.am: Add atomic_16.S for AArch64.
> configure.tgt: Disable outline atomics in AArch64 build.
> config/linux/aarch64/atomic_16.S: New file - implementation of
> ifuncs for 128-bit atomics.
> config/linux/aarch64/host-config.h: Enable ifuncs, use LSE (HWCAP_ATOMICS)
> for 1-8-byte atomics and LSE2 (HWCAP_USCAT) for 16-byte atomics.
>
> ---
> diff --git a/libatomic/Makefile.am b/libatomic/Makefile.am
> index d88515e4a03bd812334ae0b7bf4c0bba119455dc..41e5da28512150780a2018386e22b4e70afcfa3f 100644
> --- a/libatomic/Makefile.am
> +++ b/libatomic/Makefile.am
> @@ -127,6 +127,8 @@ if HAVE_IFUNC
> if ARCH_AARCH64_LINUX
> IFUNC_OPTIONS = -march=armv8-a+lse
> libatomic_la_LIBADD += $(foreach s,$(SIZES),$(addsuffix _$(s)_1_.lo,$(SIZEOBJS)))
> +libatomic_la_SOURCES += atomic_16.S
> +
> endif
> if ARCH_ARM_LINUX
> IFUNC_OPTIONS = -march=armv7-a+fp -DHAVE_KERNEL64
> diff --git a/libatomic/Makefile.in b/libatomic/Makefile.in
> index 80d25653dc75cca995c8b0b2107a55f1234a6d52..89e29fc60a7fb74341b2f0f805e461847073082c 100644
> --- a/libatomic/Makefile.in
> +++ b/libatomic/Makefile.in
> @@ -90,13 +90,14 @@ build_triplet = @build@
> host_triplet = @host@
> target_triplet = @target@
> @ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_1 = $(foreach s,$(SIZES),$(addsuffix _$(s)_1_.lo,$(SIZEOBJS)))
> -@ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_2 = $(foreach \
> +@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_2 = atomic_16.S
> +@ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_3 = $(foreach \
> @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ s,$(SIZES),$(addsuffix \
> @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ _$(s)_1_.lo,$(SIZEOBJS))) \
> @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ $(addsuffix \
> @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ _8_2_.lo,$(SIZEOBJS))
> -@ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@am__append_3 = $(addsuffix _8_1_.lo,$(SIZEOBJS))
> -@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@am__append_4 = $(addsuffix _16_1_.lo,$(SIZEOBJS)) \
> +@ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@am__append_4 = $(addsuffix _8_1_.lo,$(SIZEOBJS))
> +@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@am__append_5 = $(addsuffix _16_1_.lo,$(SIZEOBJS)) \
> @ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@ $(addsuffix _16_2_.lo,$(SIZEOBJS))
>
> subdir = .
> @@ -154,8 +155,11 @@ am__uninstall_files_from_dir = { \
> }
> am__installdirs = "$(DESTDIR)$(toolexeclibdir)"
> LTLIBRARIES = $(noinst_LTLIBRARIES) $(toolexeclib_LTLIBRARIES)
> +@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__objects_1 = \
> +@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@ atomic_16.lo
> am_libatomic_la_OBJECTS = gload.lo gstore.lo gcas.lo gexch.lo \
> - glfree.lo lock.lo init.lo fenv.lo fence.lo flag.lo
> + glfree.lo lock.lo init.lo fenv.lo fence.lo flag.lo \
> + $(am__objects_1)
> libatomic_la_OBJECTS = $(am_libatomic_la_OBJECTS)
> AM_V_lt = $(am__v_lt_@AM_V@)
> am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
> @@ -165,9 +169,9 @@ libatomic_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
> $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
> $(libatomic_la_LDFLAGS) $(LDFLAGS) -o $@
> libatomic_convenience_la_DEPENDENCIES = $(libatomic_la_LIBADD)
> -am__objects_1 = gload.lo gstore.lo gcas.lo gexch.lo glfree.lo lock.lo \
> - init.lo fenv.lo fence.lo flag.lo
> -am_libatomic_convenience_la_OBJECTS = $(am__objects_1)
> +am__objects_2 = gload.lo gstore.lo gcas.lo gexch.lo glfree.lo lock.lo \
> + init.lo fenv.lo fence.lo flag.lo $(am__objects_1)
> +am_libatomic_convenience_la_OBJECTS = $(am__objects_2)
> libatomic_convenience_la_OBJECTS = \
> $(am_libatomic_convenience_la_OBJECTS)
> AM_V_P = $(am__v_P_@AM_V@)
> @@ -185,6 +189,16 @@ am__v_at_1 =
> depcomp = $(SHELL) $(top_srcdir)/../depcomp
> am__depfiles_maybe = depfiles
> am__mv = mv -f
> +CPPASCOMPILE = $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
> + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS)
> +LTCPPASCOMPILE = $(LIBTOOL) $(AM_V_lt) $(AM_LIBTOOLFLAGS) \
> + $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) \
> + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
> + $(AM_CCASFLAGS) $(CCASFLAGS)
> +AM_V_CPPAS = $(am__v_CPPAS_@AM_V@)
> +am__v_CPPAS_ = $(am__v_CPPAS_@AM_DEFAULT_V@)
> +am__v_CPPAS_0 = @echo " CPPAS " $@;
> +am__v_CPPAS_1 =
> COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
> $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
> LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
> @@ -369,6 +383,7 @@ pdfdir = @pdfdir@
> prefix = @prefix@
> program_transform_name = @program_transform_name@
> psdir = @psdir@
> +runstatedir = @runstatedir@
> sbindir = @sbindir@
> sharedstatedir = @sharedstatedir@
> srcdir = @srcdir@
> @@ -404,9 +419,8 @@ noinst_LTLIBRARIES = libatomic_convenience.la
> @LIBAT_BUILD_VERSIONED_SHLIB_SUN_TRUE@@LIBAT_BUILD_VERSIONED_SHLIB_TRUE@libatomic_version_dep = libatomic.map-sun
> libatomic_version_info = -version-info $(libtool_VERSION)
> libatomic_la_LDFLAGS = $(libatomic_version_info) $(libatomic_version_script) $(lt_host_flags)
> -libatomic_la_SOURCES = gload.c gstore.c gcas.c gexch.c glfree.c lock.c init.c \
> - fenv.c fence.c flag.c
> -
> +libatomic_la_SOURCES = gload.c gstore.c gcas.c gexch.c glfree.c lock.c \
> + init.c fenv.c fence.c flag.c $(am__append_2)
> SIZEOBJS = load store cas exch fadd fsub fand fior fxor fnand tas
> EXTRA_libatomic_la_SOURCES = $(addsuffix _n.c,$(SIZEOBJS))
> libatomic_la_DEPENDENCIES = $(libatomic_la_LIBADD) $(libatomic_version_dep)
> @@ -432,8 +446,8 @@ all_c_files := $(foreach dir,$(search_path),$(wildcard $(dir)/*.c))
> # Then sort through them to find the one we want, and select the first.
> M_SRC = $(firstword $(filter %/$(M_FILE), $(all_c_files)))
> libatomic_la_LIBADD = $(foreach s,$(SIZES),$(addsuffix \
> - _$(s)_.lo,$(SIZEOBJS))) $(am__append_1) $(am__append_2) \
> - $(am__append_3) $(am__append_4)
> + _$(s)_.lo,$(SIZEOBJS))) $(am__append_1) $(am__append_3) \
> + $(am__append_4) $(am__append_5)
> @ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=armv8-a+lse
> @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=armv7-a+fp -DHAVE_KERNEL64
> @ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=i586
> @@ -450,7 +464,7 @@ all: auto-config.h
> $(MAKE) $(AM_MAKEFLAGS) all-recursive
>
> .SUFFIXES:
> -.SUFFIXES: .c .lo .o .obj
> +.SUFFIXES: .S .c .lo .o .obj
> am--refresh: Makefile
> @:
> $(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/../multilib.am $(am__configure_deps)
> @@ -559,6 +573,7 @@ mostlyclean-compile:
> distclean-compile:
> -rm -f *.tab.c
>
> +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/atomic_16.Plo@am__quote@
> @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fence.Plo@am__quote@
> @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fenv.Plo@am__quote@
> @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/flag.Plo@am__quote@
> @@ -570,6 +585,27 @@ distclean-compile:
> @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/init.Plo@am__quote@
> @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lock.Plo@am__quote@
>
> +.S.o:
> +@am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(CPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
> +@am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
> +@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
> +@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@
> +@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS@am__nodep@)$(CPPASCOMPILE) -c -o $@ $<
> +
> +.S.obj:
> +@am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(CPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
> +@am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
> +@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
> +@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@
> +@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS@am__nodep@)$(CPPASCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
> +
> +.S.lo:
> +@am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(LTCPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
> +@am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
> +@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
> +@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@
> +@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS@am__nodep@)$(LTCPPASCOMPILE) -c -o $@ $<
> +
> .c.o:
> @am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
> @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
> diff --git a/libatomic/config/linux/aarch64/atomic_16.S b/libatomic/config/linux/aarch64/atomic_16.S
> new file mode 100644
> index 0000000000000000000000000000000000000000..5f23dba4529528c39425221402323d07a14cc518
> --- /dev/null
> +++ b/libatomic/config/linux/aarch64/atomic_16.S
> @@ -0,0 +1,422 @@
> +/* Copyright (C) 2022 Free Software Foundation, Inc.
> +
> + This file is part of the GNU Atomic Library (libatomic).
> +
> + Libatomic is free software; you can redistribute it and/or modify it
> + under the terms of the GNU General Public License as published by
> + the Free Software Foundation; either version 3 of the License, or
> + (at your option) any later version.
> +
> + Libatomic is distributed in the hope that it will be useful, but WITHOUT ANY
> + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
> + FOR A PARTICULAR PURPOSE. See the GNU General Public License for
> + more details.
> +
> + Under Section 7 of GPL version 3, you are granted additional
> + permissions described in the GCC Runtime Library Exception, version
> + 3.1, as published by the Free Software Foundation.
> +
> + You should have received a copy of the GNU General Public License and
> + a copy of the GCC Runtime Library Exception along with this program;
> + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
> + <http://www.gnu.org/licenses/>. */
> +
> +
> + .arch armv8-a+lse
> +
> +#define ENTRY(name) \
> + .global name; \
> + .hidden name; \
> + .type name,%function; \
> + .p2align 4; \
> +name: \
> + .cfi_startproc; \
> + hint 34 // bti c
> +
> +#define END(name) \
> + .cfi_endproc; \
> + .size name, .-name;
> +
> +#define res0 x0
> +#define res1 x1
> +#define in0 x2
> +#define in1 x3
> +#define tmp0 x6
> +#define tmp1 x7
> +#define exp0 x8
> +#define exp1 x9
> +
> +#ifdef __AARCH64EB__
> +# define reslo x1
> +# define reshi x0
> +# define inlo x3
> +# define inhi x2
> +# define tmplo x7
> +# define tmphi x6
> +#else
> +# define reslo x0
> +# define reshi x1
> +# define inlo x2
> +# define inhi x3
> +# define tmplo x6
> +# define tmphi x7
> +#endif
> +
> +#define RELAXED 0
> +#define CONSUME 1
> +#define ACQUIRE 2
> +#define RELEASE 3
> +#define ACQ_REL 4
> +#define SEQ_CST 5
> +
> +
> +ENTRY (libat_load_16_i1)
> + cbnz w1, 1f
> + ldp res0, res1, [x0]
> + ret
> +1:
> + cmp w1, ACQUIRE
> + b.hi 2f
> + ldp res0, res1, [x0]
> + dmb ishld
> + ret
> +2:
> + ldp res0, res1, [x0]
> + dmb ish
> + ret
> +END (libat_load_16_i1)
> +
> +
> +ENTRY (libat_store_16_i1)
> + cbnz w4, 1f
> + stp in0, in1, [x0]
> + ret
> +1:
> + dmb ish
> + stp in0, in1, [x0]
> + cmp w4, SEQ_CST
> + beq 2f
> + ret
> +2:
> + dmb ish
> + ret
> +END (libat_store_16_i1)
> +
> +
> +ENTRY (libat_exchange_16_i1)
> + mov x5, x0
> + cbnz w4, 2f
> +1:
> + ldxp res0, res1, [x5]
> + stxp w4, in0, in1, [x5]
> + cbnz w4, 1b
> + ret
> +2:
> + cmp w4, ACQUIRE
> + b.hi 4f
> +3:
> + ldaxp res0, res1, [x5]
> + stxp w4, in0, in1, [x5]
> + cbnz w4, 3b
> + ret
> +4:
> + cmp w4, RELEASE
> + b.ne 6f
> +5:
> + ldxp res0, res1, [x5]
> + stlxp w4, in0, in1, [x5]
> + cbnz w4, 5b
> + ret
> +6:
> + ldaxp res0, res1, [x5]
> + stlxp w4, in0, in1, [x5]
> + cbnz w4, 6b
> + ret
> +END (libat_exchange_16_i1)
> +
> +
> +ENTRY (libat_compare_exchange_16_i1)
> + ldp exp0, exp1, [x1]
> + mov tmp0, exp0
> + mov tmp1, exp1
> + cbz w5, 2f
> + cmp w5, RELEASE
> + b.hs 3f
> + caspa exp0, exp1, in0, in1, [x0]
> +0:
> + cmp exp0, tmp0
> + ccmp exp1, tmp1, 0, eq
> + bne 1f
> + mov x0, 1
> + ret
> +1:
> + stp exp0, exp1, [x1]
> + mov x0, 0
> + ret
> +2:
> + casp exp0, exp1, in0, in1, [x0]
> + b 0b
> +3:
> + b.hi 4f
> + caspl exp0, exp1, in0, in1, [x0]
> + b 0b
> +4:
> + caspal exp0, exp1, in0, in1, [x0]
> + b 0b
> +END (libat_compare_exchange_16_i1)
As discussed off-list, it looks like this function should use w4 rather
than w5. OK with that change, thanks.
Obviously completely separate work, but it would be nice to teach gcc to
use ORN for the inline nand expansion. Maybe that's not heavily used though.
Richard
> +ENTRY (libat_fetch_add_16_i1)
> + mov x5, x0
> + cbnz w4, 2f
> +1:
> + ldxp res0, res1, [x5]
> + adds tmplo, reslo, inlo
> + adc tmphi, reshi, inhi
> + stxp w4, tmp0, tmp1, [x5]
> + cbnz w4, 1b
> + ret
> +2:
> + ldaxp res0, res1, [x5]
> + adds tmplo, reslo, inlo
> + adc tmphi, reshi, inhi
> + stlxp w4, tmp0, tmp1, [x5]
> + cbnz w4, 2b
> + ret
> +END (libat_fetch_add_16_i1)
> +
> +
> +ENTRY (libat_add_fetch_16_i1)
> + mov x5, x0
> + cbnz w4, 2f
> +1:
> + ldxp res0, res1, [x5]
> + adds reslo, reslo, inlo
> + adc reshi, reshi, inhi
> + stxp w4, res0, res1, [x5]
> + cbnz w4, 1b
> + ret
> +2:
> + ldaxp res0, res1, [x5]
> + adds reslo, reslo, inlo
> + adc reshi, reshi, inhi
> + stlxp w4, res0, res1, [x5]
> + cbnz w4, 2b
> + ret
> +END (libat_add_fetch_16_i1)
> +
> +
> +ENTRY (libat_fetch_sub_16_i1)
> + mov x5, x0
> + cbnz w4, 2f
> +1:
> + ldxp res0, res1, [x5]
> + subs tmplo, reslo, inlo
> + sbc tmphi, reshi, inhi
> + stxp w4, tmp0, tmp1, [x5]
> + cbnz w4, 1b
> + ret
> +2:
> + ldaxp res0, res1, [x5]
> + subs tmplo, reslo, inlo
> + sbc tmphi, reshi, inhi
> + stlxp w4, tmp0, tmp1, [x5]
> + cbnz w4, 2b
> + ret
> +END (libat_fetch_sub_16_i1)
> +
> +
> +ENTRY (libat_sub_fetch_16_i1)
> + mov x5, x0
> + cbnz w4, 2f
> +1:
> + ldxp res0, res1, [x5]
> + subs reslo, reslo, inlo
> + sbc reshi, reshi, inhi
> + stxp w4, res0, res1, [x5]
> + cbnz w4, 1b
> + ret
> +2:
> + ldaxp res0, res1, [x5]
> + subs reslo, reslo, inlo
> + sbc reshi, reshi, inhi
> + stlxp w4, res0, res1, [x5]
> + cbnz w4, 2b
> + ret
> +END (libat_sub_fetch_16_i1)
> +
> +
> +ENTRY (libat_fetch_or_16_i1)
> + mov x5, x0
> + cbnz w4, 2f
> +1:
> + ldxp res0, res1, [x5]
> + orr tmp0, res0, in0
> + orr tmp1, res1, in1
> + stxp w4, tmp0, tmp1, [x5]
> + cbnz w4, 1b
> + ret
> +2:
> + ldaxp res0, res1, [x5]
> + orr tmp0, res0, in0
> + orr tmp1, res1, in1
> + stlxp w4, tmp0, tmp1, [x5]
> + cbnz w4, 2b
> + ret
> +END (libat_fetch_or_16_i1)
> +
> +
> +ENTRY (libat_or_fetch_16_i1)
> + mov x5, x0
> + cbnz w4, 2f
> +1:
> + ldxp res0, res1, [x5]
> + orr res0, res0, in0
> + orr res1, res1, in1
> + stxp w4, res0, res1, [x5]
> + cbnz w4, 1b
> + ret
> +2:
> + ldaxp res0, res1, [x5]
> + orr res0, res0, in0
> + orr res1, res1, in1
> + stlxp w4, res0, res1, [x5]
> + cbnz w4, 2b
> + ret
> +END (libat_or_fetch_16_i1)
> +
> +
> +ENTRY (libat_fetch_and_16_i1)
> + mov x5, x0
> + cbnz w4, 2f
> +1:
> + ldxp res0, res1, [x5]
> + and tmp0, res0, in0
> + and tmp1, res1, in1
> + stxp w4, tmp0, tmp1, [x5]
> + cbnz w4, 1b
> + ret
> +2:
> + ldaxp res0, res1, [x5]
> + and tmp0, res0, in0
> + and tmp1, res1, in1
> + stlxp w4, tmp0, tmp1, [x5]
> + cbnz w4, 2b
> + ret
> +END (libat_fetch_and_16_i1)
> +
> +
> +ENTRY (libat_and_fetch_16_i1)
> + mov x5, x0
> + cbnz w4, 2f
> +1:
> + ldxp res0, res1, [x5]
> + and res0, res0, in0
> + and res1, res1, in1
> + stxp w4, res0, res1, [x5]
> + cbnz w4, 1b
> + ret
> +2:
> + ldaxp res0, res1, [x5]
> + and res0, res0, in0
> + and res1, res1, in1
> + stlxp w4, res0, res1, [x5]
> + cbnz w4, 2b
> + ret
> +END (libat_and_fetch_16_i1)
> +
> +
> +ENTRY (libat_fetch_xor_16_i1)
> + mov x5, x0
> + cbnz w4, 2f
> +1:
> + ldxp res0, res1, [x5]
> + eor tmp0, res0, in0
> + eor tmp1, res1, in1
> + stxp w4, tmp0, tmp1, [x5]
> + cbnz w4, 1b
> + ret
> +2:
> + ldaxp res0, res1, [x5]
> + eor tmp0, res0, in0
> + eor tmp1, res1, in1
> + stlxp w4, tmp0, tmp1, [x5]
> + cbnz w4, 2b
> + ret
> +END (libat_fetch_xor_16_i1)
> +
> +
> +ENTRY (libat_xor_fetch_16_i1)
> + mov x5, x0
> + cbnz w4, 2f
> +1:
> + ldxp res0, res1, [x5]
> + eor res0, res0, in0
> + eor res1, res1, in1
> + stxp w4, res0, res1, [x5]
> + cbnz w4, 1b
> + ret
> +2:
> + ldaxp res0, res1, [x5]
> + eor res0, res0, in0
> + eor res1, res1, in1
> + stlxp w4, res0, res1, [x5]
> + cbnz w4, 2b
> + ret
> +END (libat_xor_fetch_16_i1)
> +
> +
> +ENTRY (libat_fetch_nand_16_i1)
> + mov x5, x0
> + mvn in0, in0
> + mvn in1, in1
> + cbnz w4, 2f
> +1:
> + ldxp res0, res1, [x5]
> + orn tmp0, in0, res0
> + orn tmp1, in1, res1
> + stxp w4, tmp0, tmp1, [x5]
> + cbnz w4, 1b
> + ret
> +2:
> + ldaxp res0, res1, [x5]
> + orn tmp0, in0, res0
> + orn tmp1, in1, res1
> + stlxp w4, tmp0, tmp1, [x5]
> + cbnz w4, 2b
> + ret
> +END (libat_fetch_nand_16_i1)
> +
> +
> +ENTRY (libat_nand_fetch_16_i1)
> + mov x5, x0
> + mvn in0, in0
> + mvn in1, in1
> + cbnz w4, 2f
> +1:
> + ldxp res0, res1, [x5]
> + orn res0, in0, res0
> + orn res1, in1, res1
> + stxp w4, res0, res1, [x5]
> + cbnz w4, 1b
> + ret
> +2:
> + ldaxp res0, res1, [x5]
> + orn res0, in0, res0
> + orn res1, in1, res1
> + stlxp w4, res0, res1, [x5]
> + cbnz w4, 2b
> + ret
> +END (libat_nand_fetch_16_i1)
> +
> +
> +ENTRY (libat_test_and_set_16_i1)
> + mov w2, 1
> + cbnz w1, 2f
> + swpb w0, w2, [x0]
> + ret
> +
> +2: swpalb w0, w2, [x0]
> + ret
> +END (libat_test_and_set_16_i1)
> +
> diff --git a/libatomic/config/linux/aarch64/host-config.h b/libatomic/config/linux/aarch64/host-config.h
> index 769ba6edc600099122b03af754cbbb079134596a..d9b5ab31bc85cfe1d5f3773c42442e408b174cbc 100644
> --- a/libatomic/config/linux/aarch64/host-config.h
> +++ b/libatomic/config/linux/aarch64/host-config.h
> @@ -22,14 +22,22 @@
> <http://www.gnu.org/licenses/>. */
>
> #if HAVE_IFUNC
> -#include <stdlib.h>
> +#include <sys/auxv.h>
>
> -# ifdef HWCAP_ATOMICS
> -# define IFUNC_COND_1 (hwcap & HWCAP_ATOMICS)
> +#ifdef HWCAP_USCAT
> +# if N == 16
> +# define IFUNC_COND_1 (hwcap & HWCAP_USCAT)
> # else
> -# define IFUNC_COND_1 (false)
> +# define IFUNC_COND_1 (hwcap & HWCAP_ATOMICS)
> # endif
> -# define IFUNC_NCOND(N) (1)
> +#else
> +# define IFUNC_COND_1 (false)
> +#endif
> +#define IFUNC_NCOND(N) (1)
> +
> +#if N == 16 && IFUNC_ALT != 0
> +# define DONE 1
> +#endif
>
> #endif /* HAVE_IFUNC */
>
> diff --git a/libatomic/configure.tgt b/libatomic/configure.tgt
> index 33f8c91ce7718336b05e1077d3e91feb5b706730..113420f7beca143b5040fc9eb871461c2163ae44 100644
> --- a/libatomic/configure.tgt
> +++ b/libatomic/configure.tgt
> @@ -49,6 +49,7 @@ case "${target_cpu}" in
> fi
> ;;
> esac
> + XCFLAGS="${XCFLAGS} -mno-outline-atomics"
> ;;
> arm*)
> ARCH=arm
@@ -127,6 +127,8 @@ if HAVE_IFUNC
if ARCH_AARCH64_LINUX
IFUNC_OPTIONS = -march=armv8-a+lse
libatomic_la_LIBADD += $(foreach s,$(SIZES),$(addsuffix _$(s)_1_.lo,$(SIZEOBJS)))
+libatomic_la_SOURCES += atomic_16.S
+
endif
if ARCH_ARM_LINUX
IFUNC_OPTIONS = -march=armv7-a+fp -DHAVE_KERNEL64
@@ -90,13 +90,14 @@ build_triplet = @build@
host_triplet = @host@
target_triplet = @target@
@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_1 = $(foreach s,$(SIZES),$(addsuffix _$(s)_1_.lo,$(SIZEOBJS)))
-@ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_2 = $(foreach \
+@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_2 = atomic_16.S
+@ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_3 = $(foreach \
@ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ s,$(SIZES),$(addsuffix \
@ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ _$(s)_1_.lo,$(SIZEOBJS))) \
@ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ $(addsuffix \
@ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ _8_2_.lo,$(SIZEOBJS))
-@ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@am__append_3 = $(addsuffix _8_1_.lo,$(SIZEOBJS))
-@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@am__append_4 = $(addsuffix _16_1_.lo,$(SIZEOBJS)) \
+@ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@am__append_4 = $(addsuffix _8_1_.lo,$(SIZEOBJS))
+@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@am__append_5 = $(addsuffix _16_1_.lo,$(SIZEOBJS)) \
@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@ $(addsuffix _16_2_.lo,$(SIZEOBJS))
subdir = .
@@ -154,8 +155,11 @@ am__uninstall_files_from_dir = { \
}
am__installdirs = "$(DESTDIR)$(toolexeclibdir)"
LTLIBRARIES = $(noinst_LTLIBRARIES) $(toolexeclib_LTLIBRARIES)
+@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__objects_1 = \
+@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@ atomic_16.lo
am_libatomic_la_OBJECTS = gload.lo gstore.lo gcas.lo gexch.lo \
- glfree.lo lock.lo init.lo fenv.lo fence.lo flag.lo
+ glfree.lo lock.lo init.lo fenv.lo fence.lo flag.lo \
+ $(am__objects_1)
libatomic_la_OBJECTS = $(am_libatomic_la_OBJECTS)
AM_V_lt = $(am__v_lt_@AM_V@)
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
@@ -165,9 +169,9 @@ libatomic_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
$(libatomic_la_LDFLAGS) $(LDFLAGS) -o $@
libatomic_convenience_la_DEPENDENCIES = $(libatomic_la_LIBADD)
-am__objects_1 = gload.lo gstore.lo gcas.lo gexch.lo glfree.lo lock.lo \
- init.lo fenv.lo fence.lo flag.lo
-am_libatomic_convenience_la_OBJECTS = $(am__objects_1)
+am__objects_2 = gload.lo gstore.lo gcas.lo gexch.lo glfree.lo lock.lo \
+ init.lo fenv.lo fence.lo flag.lo $(am__objects_1)
+am_libatomic_convenience_la_OBJECTS = $(am__objects_2)
libatomic_convenience_la_OBJECTS = \
$(am_libatomic_convenience_la_OBJECTS)
AM_V_P = $(am__v_P_@AM_V@)
@@ -185,6 +189,16 @@ am__v_at_1 =
depcomp = $(SHELL) $(top_srcdir)/../depcomp
am__depfiles_maybe = depfiles
am__mv = mv -f
+CPPASCOMPILE = $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS)
+LTCPPASCOMPILE = $(LIBTOOL) $(AM_V_lt) $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) \
+ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+ $(AM_CCASFLAGS) $(CCASFLAGS)
+AM_V_CPPAS = $(am__v_CPPAS_@AM_V@)
+am__v_CPPAS_ = $(am__v_CPPAS_@AM_DEFAULT_V@)
+am__v_CPPAS_0 = @echo " CPPAS " $@;
+am__v_CPPAS_1 =
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
@@ -369,6 +383,7 @@ pdfdir = @pdfdir@
prefix = @prefix@
program_transform_name = @program_transform_name@
psdir = @psdir@
+runstatedir = @runstatedir@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
@@ -404,9 +419,8 @@ noinst_LTLIBRARIES = libatomic_convenience.la
@LIBAT_BUILD_VERSIONED_SHLIB_SUN_TRUE@@LIBAT_BUILD_VERSIONED_SHLIB_TRUE@libatomic_version_dep = libatomic.map-sun
libatomic_version_info = -version-info $(libtool_VERSION)
libatomic_la_LDFLAGS = $(libatomic_version_info) $(libatomic_version_script) $(lt_host_flags)
-libatomic_la_SOURCES = gload.c gstore.c gcas.c gexch.c glfree.c lock.c init.c \
- fenv.c fence.c flag.c
-
+libatomic_la_SOURCES = gload.c gstore.c gcas.c gexch.c glfree.c lock.c \
+ init.c fenv.c fence.c flag.c $(am__append_2)
SIZEOBJS = load store cas exch fadd fsub fand fior fxor fnand tas
EXTRA_libatomic_la_SOURCES = $(addsuffix _n.c,$(SIZEOBJS))
libatomic_la_DEPENDENCIES = $(libatomic_la_LIBADD) $(libatomic_version_dep)
@@ -432,8 +446,8 @@ all_c_files := $(foreach dir,$(search_path),$(wildcard $(dir)/*.c))
# Then sort through them to find the one we want, and select the first.
M_SRC = $(firstword $(filter %/$(M_FILE), $(all_c_files)))
libatomic_la_LIBADD = $(foreach s,$(SIZES),$(addsuffix \
- _$(s)_.lo,$(SIZEOBJS))) $(am__append_1) $(am__append_2) \
- $(am__append_3) $(am__append_4)
+ _$(s)_.lo,$(SIZEOBJS))) $(am__append_1) $(am__append_3) \
+ $(am__append_4) $(am__append_5)
@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=armv8-a+lse
@ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=armv7-a+fp -DHAVE_KERNEL64
@ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=i586
@@ -450,7 +464,7 @@ all: auto-config.h
$(MAKE) $(AM_MAKEFLAGS) all-recursive
.SUFFIXES:
-.SUFFIXES: .c .lo .o .obj
+.SUFFIXES: .S .c .lo .o .obj
am--refresh: Makefile
@:
$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/../multilib.am $(am__configure_deps)
@@ -559,6 +573,7 @@ mostlyclean-compile:
distclean-compile:
-rm -f *.tab.c
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/atomic_16.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fence.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fenv.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/flag.Plo@am__quote@
@@ -570,6 +585,27 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/init.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lock.Plo@am__quote@
+.S.o:
+@am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(CPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS@am__nodep@)$(CPPASCOMPILE) -c -o $@ $<
+
+.S.obj:
+@am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(CPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS@am__nodep@)$(CPPASCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.S.lo:
+@am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(LTCPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS@am__nodep@)$(LTCPPASCOMPILE) -c -o $@ $<
+
.c.o:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
new file mode 100644
@@ -0,0 +1,422 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+
+ This file is part of the GNU Atomic Library (libatomic).
+
+ Libatomic is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ Libatomic is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+
+ .arch armv8-a+lse
+
+#define ENTRY(name) \
+ .global name; \
+ .hidden name; \
+ .type name,%function; \
+ .p2align 4; \
+name: \
+ .cfi_startproc; \
+ hint 34 // bti c
+
+#define END(name) \
+ .cfi_endproc; \
+ .size name, .-name;
+
+#define res0 x0
+#define res1 x1
+#define in0 x2
+#define in1 x3
+#define tmp0 x6
+#define tmp1 x7
+#define exp0 x8
+#define exp1 x9
+
+#ifdef __AARCH64EB__
+# define reslo x1
+# define reshi x0
+# define inlo x3
+# define inhi x2
+# define tmplo x7
+# define tmphi x6
+#else
+# define reslo x0
+# define reshi x1
+# define inlo x2
+# define inhi x3
+# define tmplo x6
+# define tmphi x7
+#endif
+
+#define RELAXED 0
+#define CONSUME 1
+#define ACQUIRE 2
+#define RELEASE 3
+#define ACQ_REL 4
+#define SEQ_CST 5
+
+
+ENTRY (libat_load_16_i1)
+ cbnz w1, 1f
+ ldp res0, res1, [x0]
+ ret
+1:
+ cmp w1, ACQUIRE
+ b.hi 2f
+ ldp res0, res1, [x0]
+ dmb ishld
+ ret
+2:
+ ldp res0, res1, [x0]
+ dmb ish
+ ret
+END (libat_load_16_i1)
+
+
+ENTRY (libat_store_16_i1)
+ cbnz w4, 1f
+ stp in0, in1, [x0]
+ ret
+1:
+ dmb ish
+ stp in0, in1, [x0]
+ cmp w4, SEQ_CST
+ beq 2f
+ ret
+2:
+ dmb ish
+ ret
+END (libat_store_16_i1)
+
+
+ENTRY (libat_exchange_16_i1)
+ mov x5, x0
+ cbnz w4, 2f
+1:
+ ldxp res0, res1, [x5]
+ stxp w4, in0, in1, [x5]
+ cbnz w4, 1b
+ ret
+2:
+ cmp w4, ACQUIRE
+ b.hi 4f
+3:
+ ldaxp res0, res1, [x5]
+ stxp w4, in0, in1, [x5]
+ cbnz w4, 3b
+ ret
+4:
+ cmp w4, RELEASE
+ b.ne 6f
+5:
+ ldxp res0, res1, [x5]
+ stlxp w4, in0, in1, [x5]
+ cbnz w4, 5b
+ ret
+6:
+ ldaxp res0, res1, [x5]
+ stlxp w4, in0, in1, [x5]
+ cbnz w4, 6b
+ ret
+END (libat_exchange_16_i1)
+
+
+ENTRY (libat_compare_exchange_16_i1)
+ ldp exp0, exp1, [x1]
+ mov tmp0, exp0
+ mov tmp1, exp1
+ cbz w5, 2f
+ cmp w5, RELEASE
+ b.hs 3f
+ caspa exp0, exp1, in0, in1, [x0]
+0:
+ cmp exp0, tmp0
+ ccmp exp1, tmp1, 0, eq
+ bne 1f
+ mov x0, 1
+ ret
+1:
+ stp exp0, exp1, [x1]
+ mov x0, 0
+ ret
+2:
+ casp exp0, exp1, in0, in1, [x0]
+ b 0b
+3:
+ b.hi 4f
+ caspl exp0, exp1, in0, in1, [x0]
+ b 0b
+4:
+ caspal exp0, exp1, in0, in1, [x0]
+ b 0b
+END (libat_compare_exchange_16_i1)
+
+
+ENTRY (libat_fetch_add_16_i1)
+ mov x5, x0
+ cbnz w4, 2f
+1:
+ ldxp res0, res1, [x5]
+ adds tmplo, reslo, inlo
+ adc tmphi, reshi, inhi
+ stxp w4, tmp0, tmp1, [x5]
+ cbnz w4, 1b
+ ret
+2:
+ ldaxp res0, res1, [x5]
+ adds tmplo, reslo, inlo
+ adc tmphi, reshi, inhi
+ stlxp w4, tmp0, tmp1, [x5]
+ cbnz w4, 2b
+ ret
+END (libat_fetch_add_16_i1)
+
+
+ENTRY (libat_add_fetch_16_i1)
+ mov x5, x0
+ cbnz w4, 2f
+1:
+ ldxp res0, res1, [x5]
+ adds reslo, reslo, inlo
+ adc reshi, reshi, inhi
+ stxp w4, res0, res1, [x5]
+ cbnz w4, 1b
+ ret
+2:
+ ldaxp res0, res1, [x5]
+ adds reslo, reslo, inlo
+ adc reshi, reshi, inhi
+ stlxp w4, res0, res1, [x5]
+ cbnz w4, 2b
+ ret
+END (libat_add_fetch_16_i1)
+
+
+ENTRY (libat_fetch_sub_16_i1)
+ mov x5, x0
+ cbnz w4, 2f
+1:
+ ldxp res0, res1, [x5]
+ subs tmplo, reslo, inlo
+ sbc tmphi, reshi, inhi
+ stxp w4, tmp0, tmp1, [x5]
+ cbnz w4, 1b
+ ret
+2:
+ ldaxp res0, res1, [x5]
+ subs tmplo, reslo, inlo
+ sbc tmphi, reshi, inhi
+ stlxp w4, tmp0, tmp1, [x5]
+ cbnz w4, 2b
+ ret
+END (libat_fetch_sub_16_i1)
+
+
+ENTRY (libat_sub_fetch_16_i1)
+ mov x5, x0
+ cbnz w4, 2f
+1:
+ ldxp res0, res1, [x5]
+ subs reslo, reslo, inlo
+ sbc reshi, reshi, inhi
+ stxp w4, res0, res1, [x5]
+ cbnz w4, 1b
+ ret
+2:
+ ldaxp res0, res1, [x5]
+ subs reslo, reslo, inlo
+ sbc reshi, reshi, inhi
+ stlxp w4, res0, res1, [x5]
+ cbnz w4, 2b
+ ret
+END (libat_sub_fetch_16_i1)
+
+
+ENTRY (libat_fetch_or_16_i1)
+ mov x5, x0
+ cbnz w4, 2f
+1:
+ ldxp res0, res1, [x5]
+ orr tmp0, res0, in0
+ orr tmp1, res1, in1
+ stxp w4, tmp0, tmp1, [x5]
+ cbnz w4, 1b
+ ret
+2:
+ ldaxp res0, res1, [x5]
+ orr tmp0, res0, in0
+ orr tmp1, res1, in1
+ stlxp w4, tmp0, tmp1, [x5]
+ cbnz w4, 2b
+ ret
+END (libat_fetch_or_16_i1)
+
+
+ENTRY (libat_or_fetch_16_i1)
+ mov x5, x0
+ cbnz w4, 2f
+1:
+ ldxp res0, res1, [x5]
+ orr res0, res0, in0
+ orr res1, res1, in1
+ stxp w4, res0, res1, [x5]
+ cbnz w4, 1b
+ ret
+2:
+ ldaxp res0, res1, [x5]
+ orr res0, res0, in0
+ orr res1, res1, in1
+ stlxp w4, res0, res1, [x5]
+ cbnz w4, 2b
+ ret
+END (libat_or_fetch_16_i1)
+
+
+ENTRY (libat_fetch_and_16_i1)
+ mov x5, x0
+ cbnz w4, 2f
+1:
+ ldxp res0, res1, [x5]
+ and tmp0, res0, in0
+ and tmp1, res1, in1
+ stxp w4, tmp0, tmp1, [x5]
+ cbnz w4, 1b
+ ret
+2:
+ ldaxp res0, res1, [x5]
+ and tmp0, res0, in0
+ and tmp1, res1, in1
+ stlxp w4, tmp0, tmp1, [x5]
+ cbnz w4, 2b
+ ret
+END (libat_fetch_and_16_i1)
+
+
+ENTRY (libat_and_fetch_16_i1)
+ mov x5, x0
+ cbnz w4, 2f
+1:
+ ldxp res0, res1, [x5]
+ and res0, res0, in0
+ and res1, res1, in1
+ stxp w4, res0, res1, [x5]
+ cbnz w4, 1b
+ ret
+2:
+ ldaxp res0, res1, [x5]
+ and res0, res0, in0
+ and res1, res1, in1
+ stlxp w4, res0, res1, [x5]
+ cbnz w4, 2b
+ ret
+END (libat_and_fetch_16_i1)
+
+
+ENTRY (libat_fetch_xor_16_i1)
+ mov x5, x0
+ cbnz w4, 2f
+1:
+ ldxp res0, res1, [x5]
+ eor tmp0, res0, in0
+ eor tmp1, res1, in1
+ stxp w4, tmp0, tmp1, [x5]
+ cbnz w4, 1b
+ ret
+2:
+ ldaxp res0, res1, [x5]
+ eor tmp0, res0, in0
+ eor tmp1, res1, in1
+ stlxp w4, tmp0, tmp1, [x5]
+ cbnz w4, 2b
+ ret
+END (libat_fetch_xor_16_i1)
+
+
+ENTRY (libat_xor_fetch_16_i1)
+ mov x5, x0
+ cbnz w4, 2f
+1:
+ ldxp res0, res1, [x5]
+ eor res0, res0, in0
+ eor res1, res1, in1
+ stxp w4, res0, res1, [x5]
+ cbnz w4, 1b
+ ret
+2:
+ ldaxp res0, res1, [x5]
+ eor res0, res0, in0
+ eor res1, res1, in1
+ stlxp w4, res0, res1, [x5]
+ cbnz w4, 2b
+ ret
+END (libat_xor_fetch_16_i1)
+
+
+ENTRY (libat_fetch_nand_16_i1)
+ mov x5, x0
+ mvn in0, in0
+ mvn in1, in1
+ cbnz w4, 2f
+1:
+ ldxp res0, res1, [x5]
+ orn tmp0, in0, res0
+ orn tmp1, in1, res1
+ stxp w4, tmp0, tmp1, [x5]
+ cbnz w4, 1b
+ ret
+2:
+ ldaxp res0, res1, [x5]
+ orn tmp0, in0, res0
+ orn tmp1, in1, res1
+ stlxp w4, tmp0, tmp1, [x5]
+ cbnz w4, 2b
+ ret
+END (libat_fetch_nand_16_i1)
+
+
+ENTRY (libat_nand_fetch_16_i1)
+ mov x5, x0
+ mvn in0, in0
+ mvn in1, in1
+ cbnz w4, 2f
+1:
+ ldxp res0, res1, [x5]
+ orn res0, in0, res0
+ orn res1, in1, res1
+ stxp w4, res0, res1, [x5]
+ cbnz w4, 1b
+ ret
+2:
+ ldaxp res0, res1, [x5]
+ orn res0, in0, res0
+ orn res1, in1, res1
+ stlxp w4, res0, res1, [x5]
+ cbnz w4, 2b
+ ret
+END (libat_nand_fetch_16_i1)
+
+
+ENTRY (libat_test_and_set_16_i1)
+ mov w2, 1
+ cbnz w1, 2f
+ swpb w0, w2, [x0]
+ ret
+
+2: swpalb w0, w2, [x0]
+ ret
+END (libat_test_and_set_16_i1)
+
@@ -22,14 +22,22 @@
<http://www.gnu.org/licenses/>. */
#if HAVE_IFUNC
-#include <stdlib.h>
+#include <sys/auxv.h>
-# ifdef HWCAP_ATOMICS
-# define IFUNC_COND_1 (hwcap & HWCAP_ATOMICS)
+#ifdef HWCAP_USCAT
+# if N == 16
+# define IFUNC_COND_1 (hwcap & HWCAP_USCAT)
# else
-# define IFUNC_COND_1 (false)
+# define IFUNC_COND_1 (hwcap & HWCAP_ATOMICS)
# endif
-# define IFUNC_NCOND(N) (1)
+#else
+# define IFUNC_COND_1 (false)
+#endif
+#define IFUNC_NCOND(N) (1)
+
+#if N == 16 && IFUNC_ALT != 0
+# define DONE 1
+#endif
#endif /* HAVE_IFUNC */
@@ -49,6 +49,7 @@ case "${target_cpu}" in
fi
;;
esac
+ XCFLAGS="${XCFLAGS} -mno-outline-atomics"
;;
arm*)
ARCH=arm