On 11/1/22 11:25, Kevin Lee wrote:
> This is the updated patch of
> https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601824.html. Since
> the riscv-selftest.cc has been added, this version of the patch adds the
> logic in riscv-selftest.cc to also consider parallel insns.
> The patch has been tested with rv64imafdc / rv64imac / rv32imafdc /
> rv32imac and no additional failures were detected in the testsuite.
>
> gcc/ChangeLog:
> Jim Wilson<jim.wilson.gcc@gmail.com>
> Michael Collison<collison@rivosinc.com>
> Kevin Lee<kevinl@rivosinc.com>
> * config/riscv/predicates.md (const_lui_operand): New Predicate.
> (add_operand): Ditto.
> (reg_or_const_int_operand): Ditto.
> * config/riscv/riscv-protos.h (riscv_eliminable_reg): New
> function.
> * config/riscv/riscv-selftests.cc (calculate_x_in_sequence):
> Consider Parallel insns.
> * config/riscv/riscv.cc (riscv_eliminable_reg): New function.
> (riscv_adjust_libcall_cfi_prologue): Use gen_rtx_SET and
> gen_rtx_fmt_ee instead of gen_add3_insn.
> (riscv_adjust_libcall_cfi_epilogue): Ditto.
> * config/riscv/riscv.md (addsi3): Remove.
> (add<mode>3): New instruction for large stack frame
> optimization.
> (add<mode>3_internal): Ditto.
> (adddi3): Remove.
> (add<mode>3_internal2): New instruction for insns generated in
> the prologue and epilogue pass.
It looks like your mailer completely messed up the formatting of the
patch. Please resend it as a plaintext attachment. It's basically
unreadable as-is.
I went back and looked at the original thread, for the saxpy example,
the patch made a notable improvement in the setup code, but actually
lengthened the loop by one instruction, though it has eliminated two
memory loads in the loop, replacing them with arithmetic, which is
probably a win.
The loop still seems a bit odd which may point to further improvements
that could be made to this patch. Consider this fragment of the loop:
addi a3,sp,-864
sh2add a3,a5,a3
flw fa5,864(a3)
Note the +-864. Don't those just cancel out?
Jeff
@@ -35,6 +35,14 @@
(ior (match_operand 0 "arith_operand")
(match_operand 0 "lui_operand")))
+(define_predicate "const_lui_operand"
+ (and (match_code "const_int")
+ (match_test "(INTVAL (op) & 0xFFF) == 0 && INTVAL (op) != 0")))
+
+(define_predicate "add_operand"
+ (ior (match_operand 0 "arith_operand")
+ (match_operand 0 "const_lui_operand")))
+
(define_predicate "const_csr_operand"
(and (match_code "const_int")
(match_test "IN_RANGE (INTVAL (op), 0, 31)")))
@@ -59,6 +67,11 @@
(ior (match_operand 0 "const_0_operand")
(match_operand 0 "register_operand")))
+;; For use in adds, when adding to an eliminable register.
+(define_predicate "reg_or_const_int_operand"
+ (ior (match_code "const_int")
+ (match_operand 0 "register_operand")))
+
;; Only use branch-on-bit sequences when the mask is not an ANDI immediate.
(define_predicate "branch_on_bit_operand"
(and (match_code "const_int")
b/gcc/config/riscv/riscv-protos.h
@@ -63,6 +63,7 @@ extern void riscv_expand_conditional_move (rtx, rtx, rtx,
rtx_code, rtx, rtx);
extern rtx riscv_legitimize_call_address (rtx);
extern void riscv_set_return_address (rtx, rtx);
extern bool riscv_expand_block_move (rtx, rtx, rtx);
+extern bool riscv_eliminable_reg (rtx);
extern rtx riscv_return_addr (int, rtx);
extern poly_int64 riscv_initial_elimination_offset (int, int);
extern void riscv_expand_prologue (void);
b/gcc/config/riscv/riscv-selftests.cc
@@ -116,6 +116,9 @@ calculate_x_in_sequence (rtx reg)
rtx pat = PATTERN (insn);
rtx dest = SET_DEST (pat);
+ if (GET_CODE (pat) == PARALLEL)
+ dest = SET_DEST (XVECEXP (pat, 0, 0));
+
if (GET_CODE (pat) == CLOBBER)
continue;
@@ -4686,6 +4686,16 @@ riscv_initial_elimination_offset (int from, int to)
return src - dest;
}
+/* Return true if X is a register that will be eliminated later on. */
+bool
+riscv_eliminable_reg (rtx x)
+{
+ return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
+ || REGNO (x) == ARG_POINTER_REGNUM
+ || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
+ && REGNO (x) <= LAST_VIRTUAL_REGISTER));
+}
+
/* Implement RETURN_ADDR_RTX. We do not support moving back to a
previous frame. */
@@ -4887,8 +4897,9 @@ riscv_adjust_libcall_cfi_prologue ()
}
/* Debug info for adjust sp. */
- adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx,
- stack_pointer_rtx, GEN_INT (-saved_size));
+ adjust_sp_rtx = gen_rtx_SET (stack_pointer_rtx,
+ gen_rtx_fmt_ee (PLUS, GET_MODE (stack_pointer_rtx),
+ stack_pointer_rtx, GEN_INT (saved_size)));
dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
dwarf);
return dwarf;
@@ -4990,8 +5001,9 @@ riscv_adjust_libcall_cfi_epilogue ()
int saved_size = cfun->machine->frame.save_libcall_adjustment;
/* Debug info for adjust sp. */
- adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx,
- stack_pointer_rtx, GEN_INT (saved_size));
+ adjust_sp_rtx = gen_rtx_SET (stack_pointer_rtx,
+ gen_rtx_fmt_ee (PLUS, GET_MODE (stack_pointer_rtx),
+ stack_pointer_rtx, GEN_INT (saved_size)));
dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
dwarf);
@@ -446,23 +446,80 @@
[(set_attr "type" "fadd")
(set_attr "mode" "<UNITMODE>")])
-(define_insn "addsi3"
- [(set (match_operand:SI 0 "register_operand" "=r,r")
- (plus:SI (match_operand:SI 1 "register_operand" " r,r")
- (match_operand:SI 2 "arith_operand" " r,I")))]
+(define_expand "add<mode>3"
+ [(parallel
+ [(set (match_operand:GPR 0 "register_operand" "")
+ (plus:GPR (match_operand:GPR 1 "register_operand" "")
+ (match_operand:GPR 2 "add_operand" "")))
+ (clobber (match_scratch:GPR 3 ""))])]
""
- "add%i2%~\t%0,%1,%2"
+{
+ if (riscv_eliminable_reg (operands[1]))
+ {
+ if (splittable_const_int_operand (operands[2], <MODE>mode))
+ {
+ /* The idea here is that we emit
+ add op0, op1, %hi(op2)
+ addi op0, op0, %lo(op2)
+ Then when op1, the eliminable reg, gets replaced with sp+offset,
+ we can simplify the constants. */
+ HOST_WIDE_INT high_part = CONST_HIGH_PART (INTVAL (operands[2]));
+ emit_insn (gen_add<mode>3_internal (operands[0], operands[1],
+ GEN_INT (high_part)));
+ operands[1] = operands[0];
+ operands[2] = GEN_INT (INTVAL (operands[2]) - high_part);
+ }
+ else if (! const_arith_operand (operands[2], <MODE>mode))
+ operands[2] = force_reg (<MODE>mode, operands[2]);
+ }
+})
+
+(define_insn_and_split "add<mode>3_internal"
+ [(set (match_operand:GPR 0 "register_operand" "=r,r,&r,!&r")
+ (plus:GPR (match_operand:GPR 1 "register_operand" " %r,r,r,0")
+ (match_operand:GPR 2 "add_operand" " r,I,L,L")))
+ (clobber (match_scratch:GPR 3 "=X,X,X,&r"))]
+ ""
+{
+ if ((which_alternative == 2) || (which_alternative == 3))
+ return "#";
+
+ if (TARGET_64BIT && <MODE>mode == SImode)
+ return "add%i2w\t%0,%1,%2";
+ else
+ return "add%i2\t%0,%1,%2";
+}
+ "&& reload_completed && const_lui_operand (operands[2], <MODE>mode)"
+ [(const_int 0)]
+{
+ if (REGNO (operands[0]) != REGNO (operands[1]))
+ {
+ emit_insn (gen_mov<mode> (operands[0], operands[2]));
+ emit_insn (gen_add<mode>3_internal (operands[0], operands[0],
operands[1]));
+ }
+ else
+ {
+ emit_insn (gen_mov<mode> (operands[3], operands[2]));
+ emit_insn (gen_add<mode>3_internal (operands[0], operands[0],
operands[3]));
+ }
+ DONE;
+}
[(set_attr "type" "arith")
- (set_attr "mode" "SI")])
+ (set_attr "mode" "<MODE>")])
-(define_insn "adddi3"
- [(set (match_operand:DI 0 "register_operand" "=r,r")
- (plus:DI (match_operand:DI 1 "register_operand" " r,r")
- (match_operand:DI 2 "arith_operand" " r,I")))]
- "TARGET_64BIT"
- "add%i2\t%0,%1,%2"
+(define_insn "add<mode>3_internal2"
+ [(set (match_operand:GPR 0 "register_operand" "=r,r")
+ (plus:GPR (match_operand:GPR 1 "register_operand" " %r,r")
+ (match_operand:GPR 2 "arith_operand" " r,I")))]
+ ""
+ {
+ if (TARGET_64BIT && <MODE>mode == SImode)
+ return "add%i2w\t%0,%1,%2";
+ else
+ return "add%i2\t%0,%1,%2";
+ }
[(set_attr "type" "arith")
- (set_attr "mode" "DI")])
+ (set_attr "mode" "<MODE>")])
(define_expand "addv<mode>4"
[(set (match_operand:GPR 0 "register_operand" "=r,r")
@@ -508,6 +565,7 @@