RISC-V: Use riscv_subword_address for atomic_test_and_set

Message ID 20231101050512.33961-1-patrick@rivosinc.com
State Unresolved
Headers
Series RISC-V: Use riscv_subword_address for atomic_test_and_set |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

Patrick O'Neill Nov. 1, 2023, 5:05 a.m. UTC
  Other subword atomic patterns use riscv_subword_address to calculate
the aligned address, shift amount, mask and !mask. atomic_test_and_set
was implemented before the common function was added. After this patch
all subword atomic patterns use riscv_subword_address.

gcc/ChangeLog:

	* config/riscv/sync.md:  Use riscv_subword_address function to
	calculate the address and shift in atomic_test_and_set.

Signed-off-by: Patrick O'Neill <patrick@rivosinc.com>
---
Tested using r14-5040-g5dc2ba333f8.

This patch causes this codegen to regress (adds a mv) but *only* on -O0.

extern void abort();

short x;

int main()
{
  if ( __atomic_test_and_set(&x, __ATOMIC_SEQ_CST))
    abort();
}

Baseline:

main:
        addi    sp,sp,-16
        sd      ra,8(sp)
        sd      s0,0(sp)
        addi    s0,sp,16
        lui     a5,%hi(x)
        addi    a5,a5,%lo(x)
        andi    a4,a5,-4
        andi    a5,a5,3
        li      a3,1
        slliw   a5,a5,3
        sllw    a2,a3,a5
        amoor.w.aqrl    a3,a2,0(a4)
        srlw    a5,a3,a5
        andi    a5,a5,0xff
        beq     a5,zero,.L2
        call    abort
.L2:
        li      a5,0
        mv      a0,a5
        ld      ra,8(sp)
        ld      s0,0(sp)
        addi    sp,sp,16
        jr      ra

After patch there is an additional mv:

main:
        addi    sp,sp,-16
        sd      ra,8(sp)
        sd      s0,0(sp)
        addi    s0,sp,16
        lui     a5,%hi(x)
        addi    a5,a5,%lo(x)
        andi    a3,a5,-4
        andi    a5,a5,3
        slliw   a5,a5,3
        li      a4,1
        sllw    a2,a4,a5
        amoor.w.aqrl    a4,a2,0(a3)
        sraw    a4,a4,a5
>       mv      a5,a4
        andi    a5,a5,0xff
        beq     a5,zero,.L2
        call    abort
.L2:
        li      a5,0
        mv      a0,a5
        ld      ra,8(sp)
        ld      s0,0(sp)
        addi    sp,sp,16
        jr      ra

This can be fixed using:
--
2.34.1
  

Patch

diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index ad4751febd2..a9539977321 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -530,10 +530,9 @@ 

   emit_insn (gen_atomic_fetch_orsi (old, aligned_mem, shifted_set, model));

-  emit_move_insn (old, gen_rtx_ASHIFTRT (SImode, old,
-                                        gen_lowpart (QImode, shift)));
-
-  emit_move_insn (operands[0], gen_lowpart (QImode, old));
+  emit_move_insn (gen_lowpart (SImode, operands[0]),
+                 gen_rtx_ASHIFTRT (SImode, old,
+                                   gen_lowpart (QImode, shift)));

   DONE;
 })

But I think it hurts read/grokability of the .md sequence. If it's worth
changing for -O0 generated sequences, let me know and I'll send a follow
up patch.
---
 gcc/config/riscv/sync.md | 41 +++++++++++++++++-----------------------
 1 file changed, 17 insertions(+), 24 deletions(-)

diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index 6ff3493b5ce..ad4751febd2 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -504,43 +504,36 @@ 
    (set (attr "length") (const_int 28))])

 (define_expand "atomic_test_and_set"
-  [(match_operand:QI 0 "register_operand" "")     ;; bool output
+  [(match_operand:QI 0 "register_operand" "")    ;; bool output
    (match_operand:QI 1 "memory_operand" "+A")    ;; memory
-   (match_operand:SI 2 "const_int_operand" "")]   ;; model
+   (match_operand:SI 2 "const_int_operand" "")]  ;; model
   "TARGET_ATOMIC"
 {
   /* We have no QImode atomics, so use the address LSBs to form a mask,
      then use an aligned SImode atomic.  */
-  rtx result = operands[0];
+  rtx old = gen_reg_rtx (SImode);
   rtx mem = operands[1];
   rtx model = operands[2];
-  rtx addr = force_reg (Pmode, XEXP (mem, 0));
-
-  rtx aligned_addr = gen_reg_rtx (Pmode);
-  emit_move_insn (aligned_addr, gen_rtx_AND (Pmode, addr, GEN_INT (-4)));
+  rtx set = gen_reg_rtx (QImode);
+  rtx aligned_mem = gen_reg_rtx (SImode);
+  rtx shift = gen_reg_rtx (SImode);

-  rtx aligned_mem = change_address (mem, SImode, aligned_addr);
-  set_mem_alias_set (aligned_mem, 0);
+  /* Unused.  */
+  rtx _mask = gen_reg_rtx (SImode);
+  rtx _not_mask = gen_reg_rtx (SImode);

-  rtx offset = gen_reg_rtx (SImode);
-  emit_move_insn (offset, gen_rtx_AND (SImode, gen_lowpart (SImode, addr),
-				       GEN_INT (3)));
+  riscv_subword_address (mem, &aligned_mem, &shift, &_mask, &_not_mask);

-  rtx tmp = gen_reg_rtx (SImode);
-  emit_move_insn (tmp, GEN_INT (1));
+  emit_move_insn (set, GEN_INT (1));
+  rtx shifted_set = gen_reg_rtx (SImode);
+  riscv_lshift_subword (QImode, set, shift, &shifted_set);

-  rtx shmt = gen_reg_rtx (SImode);
-  emit_move_insn (shmt, gen_rtx_ASHIFT (SImode, offset, GEN_INT (3)));
+  emit_insn (gen_atomic_fetch_orsi (old, aligned_mem, shifted_set, model));

-  rtx word = gen_reg_rtx (SImode);
-  emit_move_insn (word, gen_rtx_ASHIFT (SImode, tmp,
-					gen_lowpart (QImode, shmt)));
+  emit_move_insn (old, gen_rtx_ASHIFTRT (SImode, old,
+					 gen_lowpart (QImode, shift)));

-  tmp = gen_reg_rtx (SImode);
-  emit_insn (gen_atomic_fetch_orsi (tmp, aligned_mem, word, model));
+  emit_move_insn (operands[0], gen_lowpart (QImode, old));

-  emit_move_insn (gen_lowpart (SImode, result),
-		  gen_rtx_LSHIFTRT (SImode, tmp,
-				    gen_lowpart (QImode, shmt)));
   DONE;
 })