[v2] RISC-V: Use riscv_subword_address for atomic_test_and_set

Message ID 20231101161439.41429-1-patrick@rivosinc.com
State Accepted
Headers
Series [v2] RISC-V: Use riscv_subword_address for atomic_test_and_set |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

Patrick O'Neill Nov. 1, 2023, 4:14 p.m. UTC
  Other subword atomic patterns use riscv_subword_address to calculate
the aligned address, shift amount, mask and !mask. atomic_test_and_set
was implemented before the common function was added. After this patch
all subword atomic patterns use riscv_subword_address.

gcc/ChangeLog:

	* config/riscv/sync.md:  Use riscv_subword_address function to
	calculate the address and shift in atomic_test_and_set.

Signed-off-by: Patrick O'Neill <patrick@rivosinc.com>
---
Changelog:
v2: Comment out the diff in the foreword so git doesn't get confused
when applying the patch
---
Tested using r14-5040-g5dc2ba333f8.

This patch causes this codegen to regress (adds a mv) but *only* on -O0.

extern void abort();

short x;

int main()
{
  if ( __atomic_test_and_set(&x, __ATOMIC_SEQ_CST))
    abort();
}

Baseline:

main:
        addi    sp,sp,-16
        sd      ra,8(sp)
        sd      s0,0(sp)
        addi    s0,sp,16
        lui     a5,%hi(x)
        addi    a5,a5,%lo(x)
        andi    a4,a5,-4
        andi    a5,a5,3
        li      a3,1
        slliw   a5,a5,3
        sllw    a2,a3,a5
        amoor.w.aqrl    a3,a2,0(a4)
        srlw    a5,a3,a5
        andi    a5,a5,0xff
        beq     a5,zero,.L2
        call    abort
.L2:
        li      a5,0
        mv      a0,a5
        ld      ra,8(sp)
        ld      s0,0(sp)
        addi    sp,sp,16
        jr      ra

After patch there is an additional mv:

main:
        addi    sp,sp,-16
        sd      ra,8(sp)
        sd      s0,0(sp)
        addi    s0,sp,16
        lui     a5,%hi(x)
        addi    a5,a5,%lo(x)
        andi    a3,a5,-4
        andi    a5,a5,3
        slliw   a5,a5,3
        li      a4,1
        sllw    a2,a4,a5
        amoor.w.aqrl    a4,a2,0(a3)
        sraw    a4,a4,a5
>       mv      a5,a4
        andi    a5,a5,0xff
        beq     a5,zero,.L2
        call    abort
.L2:
        li      a5,0
        mv      a0,a5
        ld      ra,8(sp)
        ld      s0,0(sp)
        addi    sp,sp,16
        jr      ra

This can be fixed using:
> diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
> index ad4751febd2..a9539977321 100644
> --- a/gcc/config/riscv/sync.md
> +++ b/gcc/config/riscv/sync.md
> @@ -530,10 +530,9 @@

>    emit_insn (gen_atomic_fetch_orsi (old, aligned_mem, shifted_set, model));

> -  emit_move_insn (old, gen_rtx_ASHIFTRT (SImode, old,
> -                                        gen_lowpart (QImode, shift)));
> -
> -  emit_move_insn (operands[0], gen_lowpart (QImode, old));
> +  emit_move_insn (gen_lowpart (SImode, operands[0]),
> +                 gen_rtx_ASHIFTRT (SImode, old,
> +                                   gen_lowpart (QImode, shift)));

>    DONE;
>  })

But I think it hurts read/grokability of the .md sequence. If it's worth
changing for -O0 generated sequences, let me know and I'll send a follow
up patch.
---
 gcc/config/riscv/sync.md | 41 +++++++++++++++++-----------------------
 1 file changed, 17 insertions(+), 24 deletions(-)

--
2.34.1
  

Comments

Jeff Law Nov. 1, 2023, 7 p.m. UTC | #1
On 11/1/23 10:14, Patrick O'Neill wrote:
> Other subword atomic patterns use riscv_subword_address to calculate
> the aligned address, shift amount, mask and !mask. atomic_test_and_set
> was implemented before the common function was added. After this patch
> all subword atomic patterns use riscv_subword_address.
> 
> gcc/ChangeLog:
> 
> 	* config/riscv/sync.md:  Use riscv_subword_address function to
> 	calculate the address and shift in atomic_test_and_set.
OK.  No strong opinions on the extraneous move at -O0.  It does make a 
tiny bit of work for the optimizers, but I doubt it matters in practice.

Your call if you want to fix that as a follow-up or not.

jeff
  
Patrick O'Neill Nov. 1, 2023, 10:08 p.m. UTC | #2
On 11/1/23 12:00, Jeff Law wrote:
>
>
> On 11/1/23 10:14, Patrick O'Neill wrote:
>> Other subword atomic patterns use riscv_subword_address to calculate
>> the aligned address, shift amount, mask and !mask. atomic_test_and_set
>> was implemented before the common function was added. After this patch
>> all subword atomic patterns use riscv_subword_address.
>>
>> gcc/ChangeLog:
>>
>>     * config/riscv/sync.md:  Use riscv_subword_address function to
>>     calculate the address and shift in atomic_test_and_set.
> OK.  No strong opinions on the extraneous move at -O0.  It does make a 
> tiny bit of work for the optimizers, but I doubt it matters in practice.
>
> Your call if you want to fix that as a follow-up or not.
>
> jeff
Committed.

I think I'll leave it as-is since IMO sync.md is more readable when 
those statements are broken up.
If someone finds issue with it in the future we can easily apply the 
diff from the patch.

Thanks,
Patrick
  

Patch

diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index 6ff3493b5ce..ad4751febd2 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -504,43 +504,36 @@ 
    (set (attr "length") (const_int 28))])

 (define_expand "atomic_test_and_set"
-  [(match_operand:QI 0 "register_operand" "")     ;; bool output
+  [(match_operand:QI 0 "register_operand" "")    ;; bool output
    (match_operand:QI 1 "memory_operand" "+A")    ;; memory
-   (match_operand:SI 2 "const_int_operand" "")]   ;; model
+   (match_operand:SI 2 "const_int_operand" "")]  ;; model
   "TARGET_ATOMIC"
 {
   /* We have no QImode atomics, so use the address LSBs to form a mask,
      then use an aligned SImode atomic.  */
-  rtx result = operands[0];
+  rtx old = gen_reg_rtx (SImode);
   rtx mem = operands[1];
   rtx model = operands[2];
-  rtx addr = force_reg (Pmode, XEXP (mem, 0));
-
-  rtx aligned_addr = gen_reg_rtx (Pmode);
-  emit_move_insn (aligned_addr, gen_rtx_AND (Pmode, addr, GEN_INT (-4)));
+  rtx set = gen_reg_rtx (QImode);
+  rtx aligned_mem = gen_reg_rtx (SImode);
+  rtx shift = gen_reg_rtx (SImode);

-  rtx aligned_mem = change_address (mem, SImode, aligned_addr);
-  set_mem_alias_set (aligned_mem, 0);
+  /* Unused.  */
+  rtx _mask = gen_reg_rtx (SImode);
+  rtx _not_mask = gen_reg_rtx (SImode);

-  rtx offset = gen_reg_rtx (SImode);
-  emit_move_insn (offset, gen_rtx_AND (SImode, gen_lowpart (SImode, addr),
-				       GEN_INT (3)));
+  riscv_subword_address (mem, &aligned_mem, &shift, &_mask, &_not_mask);

-  rtx tmp = gen_reg_rtx (SImode);
-  emit_move_insn (tmp, GEN_INT (1));
+  emit_move_insn (set, GEN_INT (1));
+  rtx shifted_set = gen_reg_rtx (SImode);
+  riscv_lshift_subword (QImode, set, shift, &shifted_set);

-  rtx shmt = gen_reg_rtx (SImode);
-  emit_move_insn (shmt, gen_rtx_ASHIFT (SImode, offset, GEN_INT (3)));
+  emit_insn (gen_atomic_fetch_orsi (old, aligned_mem, shifted_set, model));

-  rtx word = gen_reg_rtx (SImode);
-  emit_move_insn (word, gen_rtx_ASHIFT (SImode, tmp,
-					gen_lowpart (QImode, shmt)));
+  emit_move_insn (old, gen_rtx_ASHIFTRT (SImode, old,
+					 gen_lowpart (QImode, shift)));

-  tmp = gen_reg_rtx (SImode);
-  emit_insn (gen_atomic_fetch_orsi (tmp, aligned_mem, word, model));
+  emit_move_insn (operands[0], gen_lowpart (QImode, old));

-  emit_move_insn (gen_lowpart (SImode, result),
-		  gen_rtx_LSHIFTRT (SImode, tmp,
-				    gen_lowpart (QImode, shmt)));
   DONE;
 })