libatomic: Fix SEQ_CST 128-bit atomic load [PR108891]
Checks
Commit Message
The LSE2 ifunc for 16-byte atomic load requires a barrier before the LDP -
without it, it effectively has Load-AcquirePC semantics similar to LDAPR,
which is less restrictive than what __ATOMIC_SEQ_CST requires. This patch
fixes this and adds comments to make it easier to see which sequence is
used for each case. Use a load/store exclusive loop for store to simplify
testing memory ordering is correct (it is slightly faster too).
Passes regress, OK for commit?
libatomic/
PR libgcc/108891
config/linux/aarch64/atomic_16.S: Fix libat_load_16_i1.
Add comments describing the memory order.
---
Comments
ping
From: Wilco Dijkstra
Sent: 23 February 2023 15:11
To: GCC Patches <gcc-patches@gcc.gnu.org>
Cc: Richard Sandiford <Richard.Sandiford@arm.com>; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>
Subject: [PATCH] libatomic: Fix SEQ_CST 128-bit atomic load [PR108891]
The LSE2 ifunc for 16-byte atomic load requires a barrier before the LDP -
without it, it effectively has Load-AcquirePC semantics similar to LDAPR,
which is less restrictive than what __ATOMIC_SEQ_CST requires. This patch
fixes this and adds comments to make it easier to see which sequence is
used for each case. Use a load/store exclusive loop for store to simplify
testing memory ordering is correct (it is slightly faster too).
Passes regress, OK for commit?
libatomic/
PR libgcc/108891
config/linux/aarch64/atomic_16.S: Fix libat_load_16_i1.
Add comments describing the memory order.
---
diff --git a/libatomic/config/linux/aarch64/atomic_16.S b/libatomic/config/linux/aarch64/atomic_16.S
index 732c3534a06678664a252bdbc53652eeab0af506..05439ce394b9653c9bcb582761ff7aaa7c8f9643 100644
--- a/libatomic/config/linux/aarch64/atomic_16.S
+++ b/libatomic/config/linux/aarch64/atomic_16.S
@@ -72,33 +72,38 @@ name: \
ENTRY (libat_load_16_i1)
cbnz w1, 1f
+
+ /* RELAXED. */
ldp res0, res1, [x0]
ret
1:
- cmp w1, ACQUIRE
- b.hi 2f
+ cmp w1, SEQ_CST
+ b.eq 2f
+
+ /* ACQUIRE/CONSUME (Load-AcquirePC semantics). */
ldp res0, res1, [x0]
dmb ishld
ret
-2:
+
+ /* SEQ_CST. */
+2: ldar tmp0, [x0] /* Block reordering with Store-Release instr. */
ldp res0, res1, [x0]
- dmb ish
+ dmb ishld
ret
END (libat_load_16_i1)
ENTRY (libat_store_16_i1)
cbnz w4, 1f
+
+ /* RELAXED. */
stp in0, in1, [x0]
ret
-1:
- dmb ish
- stp in0, in1, [x0]
- cmp w4, SEQ_CST
- beq 2f
- ret
-2:
- dmb ish
+
+ /* RELEASE/SEQ_CST. */
+1: ldaxp xzr, tmp0, [x0]
+ stlxp w4, in0, in1, [x0]
+ cbnz w4, 1b
ret
END (libat_store_16_i1)
@@ -106,29 +111,33 @@ END (libat_store_16_i1)
ENTRY (libat_exchange_16_i1)
mov x5, x0
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
stxp w4, in0, in1, [x5]
cbnz w4, 1b
ret
2:
cmp w4, ACQUIRE
b.hi 4f
-3:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME. */
+3: ldaxp res0, res1, [x5]
stxp w4, in0, in1, [x5]
cbnz w4, 3b
ret
4:
cmp w4, RELEASE
b.ne 6f
-5:
- ldxp res0, res1, [x5]
+
+ /* RELEASE. */
+5: ldxp res0, res1, [x5]
stlxp w4, in0, in1, [x5]
cbnz w4, 5b
ret
-6:
- ldaxp res0, res1, [x5]
+
+ /* ACQ_REL/SEQ_CST. */
+6: ldaxp res0, res1, [x5]
stlxp w4, in0, in1, [x5]
cbnz w4, 6b
ret
@@ -142,6 +151,8 @@ ENTRY (libat_compare_exchange_16_i1)
cbz w4, 2f
cmp w4, RELEASE
b.hs 3f
+
+ /* ACQUIRE/CONSUME. */
caspa exp0, exp1, in0, in1, [x0]
0:
cmp exp0, tmp0
@@ -153,15 +164,18 @@ ENTRY (libat_compare_exchange_16_i1)
stp exp0, exp1, [x1]
mov x0, 0
ret
-2:
- casp exp0, exp1, in0, in1, [x0]
+
+ /* RELAXED. */
+2: casp exp0, exp1, in0, in1, [x0]
b 0b
-3:
- b.hi 4f
+
+ /* RELEASE. */
+3: b.hi 4f
caspl exp0, exp1, in0, in1, [x0]
b 0b
-4:
- caspal exp0, exp1, in0, in1, [x0]
+
+ /* ACQ_REL/SEQ_CST. */
+4: caspal exp0, exp1, in0, in1, [x0]
b 0b
END (libat_compare_exchange_16_i1)
@@ -169,15 +183,17 @@ END (libat_compare_exchange_16_i1)
ENTRY (libat_fetch_add_16_i1)
mov x5, x0
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
adds tmplo, reslo, inlo
adc tmphi, reshi, inhi
stxp w4, tmp0, tmp1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
adds tmplo, reslo, inlo
adc tmphi, reshi, inhi
stlxp w4, tmp0, tmp1, [x5]
@@ -189,15 +205,17 @@ END (libat_fetch_add_16_i1)
ENTRY (libat_add_fetch_16_i1)
mov x5, x0
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
adds reslo, reslo, inlo
adc reshi, reshi, inhi
stxp w4, res0, res1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
adds reslo, reslo, inlo
adc reshi, reshi, inhi
stlxp w4, res0, res1, [x5]
@@ -209,15 +227,17 @@ END (libat_add_fetch_16_i1)
ENTRY (libat_fetch_sub_16_i1)
mov x5, x0
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
subs tmplo, reslo, inlo
sbc tmphi, reshi, inhi
stxp w4, tmp0, tmp1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
subs tmplo, reslo, inlo
sbc tmphi, reshi, inhi
stlxp w4, tmp0, tmp1, [x5]
@@ -229,15 +249,17 @@ END (libat_fetch_sub_16_i1)
ENTRY (libat_sub_fetch_16_i1)
mov x5, x0
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
subs reslo, reslo, inlo
sbc reshi, reshi, inhi
stxp w4, res0, res1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
subs reslo, reslo, inlo
sbc reshi, reshi, inhi
stlxp w4, res0, res1, [x5]
@@ -249,15 +271,17 @@ END (libat_sub_fetch_16_i1)
ENTRY (libat_fetch_or_16_i1)
mov x5, x0
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
orr tmp0, res0, in0
orr tmp1, res1, in1
stxp w4, tmp0, tmp1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
orr tmp0, res0, in0
orr tmp1, res1, in1
stlxp w4, tmp0, tmp1, [x5]
@@ -269,15 +293,17 @@ END (libat_fetch_or_16_i1)
ENTRY (libat_or_fetch_16_i1)
mov x5, x0
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
orr res0, res0, in0
orr res1, res1, in1
stxp w4, res0, res1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
orr res0, res0, in0
orr res1, res1, in1
stlxp w4, res0, res1, [x5]
@@ -289,15 +315,17 @@ END (libat_or_fetch_16_i1)
ENTRY (libat_fetch_and_16_i1)
mov x5, x0
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
and tmp0, res0, in0
and tmp1, res1, in1
stxp w4, tmp0, tmp1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
and tmp0, res0, in0
and tmp1, res1, in1
stlxp w4, tmp0, tmp1, [x5]
@@ -309,15 +337,17 @@ END (libat_fetch_and_16_i1)
ENTRY (libat_and_fetch_16_i1)
mov x5, x0
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
and res0, res0, in0
and res1, res1, in1
stxp w4, res0, res1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
and res0, res0, in0
and res1, res1, in1
stlxp w4, res0, res1, [x5]
@@ -329,15 +359,17 @@ END (libat_and_fetch_16_i1)
ENTRY (libat_fetch_xor_16_i1)
mov x5, x0
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
eor tmp0, res0, in0
eor tmp1, res1, in1
stxp w4, tmp0, tmp1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
eor tmp0, res0, in0
eor tmp1, res1, in1
stlxp w4, tmp0, tmp1, [x5]
@@ -349,15 +381,17 @@ END (libat_fetch_xor_16_i1)
ENTRY (libat_xor_fetch_16_i1)
mov x5, x0
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
eor res0, res0, in0
eor res1, res1, in1
stxp w4, res0, res1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
eor res0, res0, in0
eor res1, res1, in1
stlxp w4, res0, res1, [x5]
@@ -371,15 +405,17 @@ ENTRY (libat_fetch_nand_16_i1)
mvn in0, in0
mvn in1, in1
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
orn tmp0, in0, res0
orn tmp1, in1, res1
stxp w4, tmp0, tmp1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
orn tmp0, in0, res0
orn tmp1, in1, res1
stlxp w4, tmp0, tmp1, [x5]
@@ -393,15 +429,17 @@ ENTRY (libat_nand_fetch_16_i1)
mvn in0, in0
mvn in1, in1
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
orn res0, in0, res0
orn res1, in1, res1
stxp w4, res0, res1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
orn res0, in0, res0
orn res1, in1, res1
stlxp w4, res0, res1, [x5]
@@ -413,9 +451,12 @@ END (libat_nand_fetch_16_i1)
ENTRY (libat_test_and_set_16_i1)
mov w2, 1
cbnz w1, 2f
+
+ /* RELAXED. */
swpb w0, w2, [x0]
ret
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
2: swpalb w0, w2, [x0]
ret
END (libat_test_and_set_16_i1)
> -----Original Message-----
> From: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
> Sent: Thursday, March 16, 2023 6:22 PM
> To: GCC Patches <gcc-patches@gcc.gnu.org>
> Cc: Richard Sandiford <Richard.Sandiford@arm.com>; Kyrylo Tkachov
> <Kyrylo.Tkachov@arm.com>
> Subject: Re: [PATCH] libatomic: Fix SEQ_CST 128-bit atomic load [PR108891]
>
> ping
>
>
> From: Wilco Dijkstra
> Sent: 23 February 2023 15:11
> To: GCC Patches <gcc-patches@gcc.gnu.org>
> Cc: Richard Sandiford <Richard.Sandiford@arm.com>; Kyrylo Tkachov
> <Kyrylo.Tkachov@arm.com>
> Subject: [PATCH] libatomic: Fix SEQ_CST 128-bit atomic load [PR108891]
>
>
> The LSE2 ifunc for 16-byte atomic load requires a barrier before the LDP -
> without it, it effectively has Load-AcquirePC semantics similar to LDAPR,
> which is less restrictive than what __ATOMIC_SEQ_CST requires. This patch
> fixes this and adds comments to make it easier to see which sequence is
> used for each case. Use a load/store exclusive loop for store to simplify
> testing memory ordering is correct (it is slightly faster too).
>
> Passes regress, OK for commit?
>
Ok.
Thanks,
Kyrill
> libatomic/
> PR libgcc/108891
> config/linux/aarch64/atomic_16.S: Fix libat_load_16_i1.
> Add comments describing the memory order.
>
> ---
>
> diff --git a/libatomic/config/linux/aarch64/atomic_16.S
> b/libatomic/config/linux/aarch64/atomic_16.S
> index
> 732c3534a06678664a252bdbc53652eeab0af506..05439ce394b9653c9bcb582
> 761ff7aaa7c8f9643 100644
> --- a/libatomic/config/linux/aarch64/atomic_16.S
> +++ b/libatomic/config/linux/aarch64/atomic_16.S
> @@ -72,33 +72,38 @@ name: \
>
> ENTRY (libat_load_16_i1)
> cbnz w1, 1f
> +
> + /* RELAXED. */
> ldp res0, res1, [x0]
> ret
> 1:
> - cmp w1, ACQUIRE
> - b.hi 2f
> + cmp w1, SEQ_CST
> + b.eq 2f
> +
> + /* ACQUIRE/CONSUME (Load-AcquirePC semantics). */
> ldp res0, res1, [x0]
> dmb ishld
> ret
> -2:
> +
> + /* SEQ_CST. */
> +2: ldar tmp0, [x0] /* Block reordering with Store-Release instr. */
> ldp res0, res1, [x0]
> - dmb ish
> + dmb ishld
> ret
> END (libat_load_16_i1)
>
>
> ENTRY (libat_store_16_i1)
> cbnz w4, 1f
> +
> + /* RELAXED. */
> stp in0, in1, [x0]
> ret
> -1:
> - dmb ish
> - stp in0, in1, [x0]
> - cmp w4, SEQ_CST
> - beq 2f
> - ret
> -2:
> - dmb ish
> +
> + /* RELEASE/SEQ_CST. */
> +1: ldaxp xzr, tmp0, [x0]
> + stlxp w4, in0, in1, [x0]
> + cbnz w4, 1b
> ret
> END (libat_store_16_i1)
>
> @@ -106,29 +111,33 @@ END (libat_store_16_i1)
> ENTRY (libat_exchange_16_i1)
> mov x5, x0
> cbnz w4, 2f
> -1:
> - ldxp res0, res1, [x5]
> +
> + /* RELAXED. */
> +1: ldxp res0, res1, [x5]
> stxp w4, in0, in1, [x5]
> cbnz w4, 1b
> ret
> 2:
> cmp w4, ACQUIRE
> b.hi 4f
> -3:
> - ldaxp res0, res1, [x5]
> +
> + /* ACQUIRE/CONSUME. */
> +3: ldaxp res0, res1, [x5]
> stxp w4, in0, in1, [x5]
> cbnz w4, 3b
> ret
> 4:
> cmp w4, RELEASE
> b.ne 6f
> -5:
> - ldxp res0, res1, [x5]
> +
> + /* RELEASE. */
> +5: ldxp res0, res1, [x5]
> stlxp w4, in0, in1, [x5]
> cbnz w4, 5b
> ret
> -6:
> - ldaxp res0, res1, [x5]
> +
> + /* ACQ_REL/SEQ_CST. */
> +6: ldaxp res0, res1, [x5]
> stlxp w4, in0, in1, [x5]
> cbnz w4, 6b
> ret
> @@ -142,6 +151,8 @@ ENTRY (libat_compare_exchange_16_i1)
> cbz w4, 2f
> cmp w4, RELEASE
> b.hs 3f
> +
> + /* ACQUIRE/CONSUME. */
> caspa exp0, exp1, in0, in1, [x0]
> 0:
> cmp exp0, tmp0
> @@ -153,15 +164,18 @@ ENTRY (libat_compare_exchange_16_i1)
> stp exp0, exp1, [x1]
> mov x0, 0
> ret
> -2:
> - casp exp0, exp1, in0, in1, [x0]
> +
> + /* RELAXED. */
> +2: casp exp0, exp1, in0, in1, [x0]
> b 0b
> -3:
> - b.hi 4f
> +
> + /* RELEASE. */
> +3: b.hi 4f
> caspl exp0, exp1, in0, in1, [x0]
> b 0b
> -4:
> - caspal exp0, exp1, in0, in1, [x0]
> +
> + /* ACQ_REL/SEQ_CST. */
> +4: caspal exp0, exp1, in0, in1, [x0]
> b 0b
> END (libat_compare_exchange_16_i1)
>
> @@ -169,15 +183,17 @@ END (libat_compare_exchange_16_i1)
> ENTRY (libat_fetch_add_16_i1)
> mov x5, x0
> cbnz w4, 2f
> -1:
> - ldxp res0, res1, [x5]
> +
> + /* RELAXED. */
> +1: ldxp res0, res1, [x5]
> adds tmplo, reslo, inlo
> adc tmphi, reshi, inhi
> stxp w4, tmp0, tmp1, [x5]
> cbnz w4, 1b
> ret
> -2:
> - ldaxp res0, res1, [x5]
> +
> + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
> +2: ldaxp res0, res1, [x5]
> adds tmplo, reslo, inlo
> adc tmphi, reshi, inhi
> stlxp w4, tmp0, tmp1, [x5]
> @@ -189,15 +205,17 @@ END (libat_fetch_add_16_i1)
> ENTRY (libat_add_fetch_16_i1)
> mov x5, x0
> cbnz w4, 2f
> -1:
> - ldxp res0, res1, [x5]
> +
> + /* RELAXED. */
> +1: ldxp res0, res1, [x5]
> adds reslo, reslo, inlo
> adc reshi, reshi, inhi
> stxp w4, res0, res1, [x5]
> cbnz w4, 1b
> ret
> -2:
> - ldaxp res0, res1, [x5]
> +
> + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
> +2: ldaxp res0, res1, [x5]
> adds reslo, reslo, inlo
> adc reshi, reshi, inhi
> stlxp w4, res0, res1, [x5]
> @@ -209,15 +227,17 @@ END (libat_add_fetch_16_i1)
> ENTRY (libat_fetch_sub_16_i1)
> mov x5, x0
> cbnz w4, 2f
> -1:
> - ldxp res0, res1, [x5]
> +
> + /* RELAXED. */
> +1: ldxp res0, res1, [x5]
> subs tmplo, reslo, inlo
> sbc tmphi, reshi, inhi
> stxp w4, tmp0, tmp1, [x5]
> cbnz w4, 1b
> ret
> -2:
> - ldaxp res0, res1, [x5]
> +
> + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
> +2: ldaxp res0, res1, [x5]
> subs tmplo, reslo, inlo
> sbc tmphi, reshi, inhi
> stlxp w4, tmp0, tmp1, [x5]
> @@ -229,15 +249,17 @@ END (libat_fetch_sub_16_i1)
> ENTRY (libat_sub_fetch_16_i1)
> mov x5, x0
> cbnz w4, 2f
> -1:
> - ldxp res0, res1, [x5]
> +
> + /* RELAXED. */
> +1: ldxp res0, res1, [x5]
> subs reslo, reslo, inlo
> sbc reshi, reshi, inhi
> stxp w4, res0, res1, [x5]
> cbnz w4, 1b
> ret
> -2:
> - ldaxp res0, res1, [x5]
> +
> + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
> +2: ldaxp res0, res1, [x5]
> subs reslo, reslo, inlo
> sbc reshi, reshi, inhi
> stlxp w4, res0, res1, [x5]
> @@ -249,15 +271,17 @@ END (libat_sub_fetch_16_i1)
> ENTRY (libat_fetch_or_16_i1)
> mov x5, x0
> cbnz w4, 2f
> -1:
> - ldxp res0, res1, [x5]
> +
> + /* RELAXED. */
> +1: ldxp res0, res1, [x5]
> orr tmp0, res0, in0
> orr tmp1, res1, in1
> stxp w4, tmp0, tmp1, [x5]
> cbnz w4, 1b
> ret
> -2:
> - ldaxp res0, res1, [x5]
> +
> + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
> +2: ldaxp res0, res1, [x5]
> orr tmp0, res0, in0
> orr tmp1, res1, in1
> stlxp w4, tmp0, tmp1, [x5]
> @@ -269,15 +293,17 @@ END (libat_fetch_or_16_i1)
> ENTRY (libat_or_fetch_16_i1)
> mov x5, x0
> cbnz w4, 2f
> -1:
> - ldxp res0, res1, [x5]
> +
> + /* RELAXED. */
> +1: ldxp res0, res1, [x5]
> orr res0, res0, in0
> orr res1, res1, in1
> stxp w4, res0, res1, [x5]
> cbnz w4, 1b
> ret
> -2:
> - ldaxp res0, res1, [x5]
> +
> + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
> +2: ldaxp res0, res1, [x5]
> orr res0, res0, in0
> orr res1, res1, in1
> stlxp w4, res0, res1, [x5]
> @@ -289,15 +315,17 @@ END (libat_or_fetch_16_i1)
> ENTRY (libat_fetch_and_16_i1)
> mov x5, x0
> cbnz w4, 2f
> -1:
> - ldxp res0, res1, [x5]
> +
> + /* RELAXED. */
> +1: ldxp res0, res1, [x5]
> and tmp0, res0, in0
> and tmp1, res1, in1
> stxp w4, tmp0, tmp1, [x5]
> cbnz w4, 1b
> ret
> -2:
> - ldaxp res0, res1, [x5]
> +
> + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
> +2: ldaxp res0, res1, [x5]
> and tmp0, res0, in0
> and tmp1, res1, in1
> stlxp w4, tmp0, tmp1, [x5]
> @@ -309,15 +337,17 @@ END (libat_fetch_and_16_i1)
> ENTRY (libat_and_fetch_16_i1)
> mov x5, x0
> cbnz w4, 2f
> -1:
> - ldxp res0, res1, [x5]
> +
> + /* RELAXED. */
> +1: ldxp res0, res1, [x5]
> and res0, res0, in0
> and res1, res1, in1
> stxp w4, res0, res1, [x5]
> cbnz w4, 1b
> ret
> -2:
> - ldaxp res0, res1, [x5]
> +
> + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
> +2: ldaxp res0, res1, [x5]
> and res0, res0, in0
> and res1, res1, in1
> stlxp w4, res0, res1, [x5]
> @@ -329,15 +359,17 @@ END (libat_and_fetch_16_i1)
> ENTRY (libat_fetch_xor_16_i1)
> mov x5, x0
> cbnz w4, 2f
> -1:
> - ldxp res0, res1, [x5]
> +
> + /* RELAXED. */
> +1: ldxp res0, res1, [x5]
> eor tmp0, res0, in0
> eor tmp1, res1, in1
> stxp w4, tmp0, tmp1, [x5]
> cbnz w4, 1b
> ret
> -2:
> - ldaxp res0, res1, [x5]
> +
> + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
> +2: ldaxp res0, res1, [x5]
> eor tmp0, res0, in0
> eor tmp1, res1, in1
> stlxp w4, tmp0, tmp1, [x5]
> @@ -349,15 +381,17 @@ END (libat_fetch_xor_16_i1)
> ENTRY (libat_xor_fetch_16_i1)
> mov x5, x0
> cbnz w4, 2f
> -1:
> - ldxp res0, res1, [x5]
> +
> + /* RELAXED. */
> +1: ldxp res0, res1, [x5]
> eor res0, res0, in0
> eor res1, res1, in1
> stxp w4, res0, res1, [x5]
> cbnz w4, 1b
> ret
> -2:
> - ldaxp res0, res1, [x5]
> +
> + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
> +2: ldaxp res0, res1, [x5]
> eor res0, res0, in0
> eor res1, res1, in1
> stlxp w4, res0, res1, [x5]
> @@ -371,15 +405,17 @@ ENTRY (libat_fetch_nand_16_i1)
> mvn in0, in0
> mvn in1, in1
> cbnz w4, 2f
> -1:
> - ldxp res0, res1, [x5]
> +
> + /* RELAXED. */
> +1: ldxp res0, res1, [x5]
> orn tmp0, in0, res0
> orn tmp1, in1, res1
> stxp w4, tmp0, tmp1, [x5]
> cbnz w4, 1b
> ret
> -2:
> - ldaxp res0, res1, [x5]
> +
> + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
> +2: ldaxp res0, res1, [x5]
> orn tmp0, in0, res0
> orn tmp1, in1, res1
> stlxp w4, tmp0, tmp1, [x5]
> @@ -393,15 +429,17 @@ ENTRY (libat_nand_fetch_16_i1)
> mvn in0, in0
> mvn in1, in1
> cbnz w4, 2f
> -1:
> - ldxp res0, res1, [x5]
> +
> + /* RELAXED. */
> +1: ldxp res0, res1, [x5]
> orn res0, in0, res0
> orn res1, in1, res1
> stxp w4, res0, res1, [x5]
> cbnz w4, 1b
> ret
> -2:
> - ldaxp res0, res1, [x5]
> +
> + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
> +2: ldaxp res0, res1, [x5]
> orn res0, in0, res0
> orn res1, in1, res1
> stlxp w4, res0, res1, [x5]
> @@ -413,9 +451,12 @@ END (libat_nand_fetch_16_i1)
> ENTRY (libat_test_and_set_16_i1)
> mov w2, 1
> cbnz w1, 2f
> +
> + /* RELAXED. */
> swpb w0, w2, [x0]
> ret
>
> + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
> 2: swpalb w0, w2, [x0]
> ret
> END (libat_test_and_set_16_i1)
@@ -72,33 +72,38 @@ name: \
ENTRY (libat_load_16_i1)
cbnz w1, 1f
+
+ /* RELAXED. */
ldp res0, res1, [x0]
ret
1:
- cmp w1, ACQUIRE
- b.hi 2f
+ cmp w1, SEQ_CST
+ b.eq 2f
+
+ /* ACQUIRE/CONSUME (Load-AcquirePC semantics). */
ldp res0, res1, [x0]
dmb ishld
ret
-2:
+
+ /* SEQ_CST. */
+2: ldar tmp0, [x0] /* Block reordering with Store-Release instr. */
ldp res0, res1, [x0]
- dmb ish
+ dmb ishld
ret
END (libat_load_16_i1)
ENTRY (libat_store_16_i1)
cbnz w4, 1f
+
+ /* RELAXED. */
stp in0, in1, [x0]
ret
-1:
- dmb ish
- stp in0, in1, [x0]
- cmp w4, SEQ_CST
- beq 2f
- ret
-2:
- dmb ish
+
+ /* RELEASE/SEQ_CST. */
+1: ldaxp xzr, tmp0, [x0]
+ stlxp w4, in0, in1, [x0]
+ cbnz w4, 1b
ret
END (libat_store_16_i1)
@@ -106,29 +111,33 @@ END (libat_store_16_i1)
ENTRY (libat_exchange_16_i1)
mov x5, x0
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
stxp w4, in0, in1, [x5]
cbnz w4, 1b
ret
2:
cmp w4, ACQUIRE
b.hi 4f
-3:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME. */
+3: ldaxp res0, res1, [x5]
stxp w4, in0, in1, [x5]
cbnz w4, 3b
ret
4:
cmp w4, RELEASE
b.ne 6f
-5:
- ldxp res0, res1, [x5]
+
+ /* RELEASE. */
+5: ldxp res0, res1, [x5]
stlxp w4, in0, in1, [x5]
cbnz w4, 5b
ret
-6:
- ldaxp res0, res1, [x5]
+
+ /* ACQ_REL/SEQ_CST. */
+6: ldaxp res0, res1, [x5]
stlxp w4, in0, in1, [x5]
cbnz w4, 6b
ret
@@ -142,6 +151,8 @@ ENTRY (libat_compare_exchange_16_i1)
cbz w4, 2f
cmp w4, RELEASE
b.hs 3f
+
+ /* ACQUIRE/CONSUME. */
caspa exp0, exp1, in0, in1, [x0]
0:
cmp exp0, tmp0
@@ -153,15 +164,18 @@ ENTRY (libat_compare_exchange_16_i1)
stp exp0, exp1, [x1]
mov x0, 0
ret
-2:
- casp exp0, exp1, in0, in1, [x0]
+
+ /* RELAXED. */
+2: casp exp0, exp1, in0, in1, [x0]
b 0b
-3:
- b.hi 4f
+
+ /* RELEASE. */
+3: b.hi 4f
caspl exp0, exp1, in0, in1, [x0]
b 0b
-4:
- caspal exp0, exp1, in0, in1, [x0]
+
+ /* ACQ_REL/SEQ_CST. */
+4: caspal exp0, exp1, in0, in1, [x0]
b 0b
END (libat_compare_exchange_16_i1)
@@ -169,15 +183,17 @@ END (libat_compare_exchange_16_i1)
ENTRY (libat_fetch_add_16_i1)
mov x5, x0
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
adds tmplo, reslo, inlo
adc tmphi, reshi, inhi
stxp w4, tmp0, tmp1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
adds tmplo, reslo, inlo
adc tmphi, reshi, inhi
stlxp w4, tmp0, tmp1, [x5]
@@ -189,15 +205,17 @@ END (libat_fetch_add_16_i1)
ENTRY (libat_add_fetch_16_i1)
mov x5, x0
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
adds reslo, reslo, inlo
adc reshi, reshi, inhi
stxp w4, res0, res1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
adds reslo, reslo, inlo
adc reshi, reshi, inhi
stlxp w4, res0, res1, [x5]
@@ -209,15 +227,17 @@ END (libat_add_fetch_16_i1)
ENTRY (libat_fetch_sub_16_i1)
mov x5, x0
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
subs tmplo, reslo, inlo
sbc tmphi, reshi, inhi
stxp w4, tmp0, tmp1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
subs tmplo, reslo, inlo
sbc tmphi, reshi, inhi
stlxp w4, tmp0, tmp1, [x5]
@@ -229,15 +249,17 @@ END (libat_fetch_sub_16_i1)
ENTRY (libat_sub_fetch_16_i1)
mov x5, x0
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
subs reslo, reslo, inlo
sbc reshi, reshi, inhi
stxp w4, res0, res1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
subs reslo, reslo, inlo
sbc reshi, reshi, inhi
stlxp w4, res0, res1, [x5]
@@ -249,15 +271,17 @@ END (libat_sub_fetch_16_i1)
ENTRY (libat_fetch_or_16_i1)
mov x5, x0
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
orr tmp0, res0, in0
orr tmp1, res1, in1
stxp w4, tmp0, tmp1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
orr tmp0, res0, in0
orr tmp1, res1, in1
stlxp w4, tmp0, tmp1, [x5]
@@ -269,15 +293,17 @@ END (libat_fetch_or_16_i1)
ENTRY (libat_or_fetch_16_i1)
mov x5, x0
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
orr res0, res0, in0
orr res1, res1, in1
stxp w4, res0, res1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
orr res0, res0, in0
orr res1, res1, in1
stlxp w4, res0, res1, [x5]
@@ -289,15 +315,17 @@ END (libat_or_fetch_16_i1)
ENTRY (libat_fetch_and_16_i1)
mov x5, x0
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
and tmp0, res0, in0
and tmp1, res1, in1
stxp w4, tmp0, tmp1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
and tmp0, res0, in0
and tmp1, res1, in1
stlxp w4, tmp0, tmp1, [x5]
@@ -309,15 +337,17 @@ END (libat_fetch_and_16_i1)
ENTRY (libat_and_fetch_16_i1)
mov x5, x0
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
and res0, res0, in0
and res1, res1, in1
stxp w4, res0, res1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
and res0, res0, in0
and res1, res1, in1
stlxp w4, res0, res1, [x5]
@@ -329,15 +359,17 @@ END (libat_and_fetch_16_i1)
ENTRY (libat_fetch_xor_16_i1)
mov x5, x0
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
eor tmp0, res0, in0
eor tmp1, res1, in1
stxp w4, tmp0, tmp1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
eor tmp0, res0, in0
eor tmp1, res1, in1
stlxp w4, tmp0, tmp1, [x5]
@@ -349,15 +381,17 @@ END (libat_fetch_xor_16_i1)
ENTRY (libat_xor_fetch_16_i1)
mov x5, x0
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
eor res0, res0, in0
eor res1, res1, in1
stxp w4, res0, res1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
eor res0, res0, in0
eor res1, res1, in1
stlxp w4, res0, res1, [x5]
@@ -371,15 +405,17 @@ ENTRY (libat_fetch_nand_16_i1)
mvn in0, in0
mvn in1, in1
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
orn tmp0, in0, res0
orn tmp1, in1, res1
stxp w4, tmp0, tmp1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
orn tmp0, in0, res0
orn tmp1, in1, res1
stlxp w4, tmp0, tmp1, [x5]
@@ -393,15 +429,17 @@ ENTRY (libat_nand_fetch_16_i1)
mvn in0, in0
mvn in1, in1
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
orn res0, in0, res0
orn res1, in1, res1
stxp w4, res0, res1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
orn res0, in0, res0
orn res1, in1, res1
stlxp w4, res0, res1, [x5]
@@ -413,9 +451,12 @@ END (libat_nand_fetch_16_i1)
ENTRY (libat_test_and_set_16_i1)
mov w2, 1
cbnz w1, 2f
+
+ /* RELAXED. */
swpb w0, w2, [x0]
ret
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
2: swpalb w0, w2, [x0]
ret
END (libat_test_and_set_16_i1)