[v2,1/2] RISC-V: Use bseti/bclri/binvi to extend reach of ori/andi/xori
Checks
Commit Message
Sequences of the form "a | C" and "a ^ C" with C being the positive
half of a signed immediate's range with one extra bit set in addition
are mapped to ori/xori and one bseti/binvi to avoid using a temporary
(and a multi-insn sequence to load C into that temporary).
Something similar holds for "a & ~C" being representable as either
bclri + bclri or bclri + andi.
gcc/ChangeLog:
* config/riscv/bitmanip.md (*<or_optab>i<mode>_extrabit):
New pattern for binvi+binvi/xori and bseti+bseti/ori
(*andi<mode>_extrabit): New pattern for bclri+bclri/andi
* config/riscv/iterators.md (any_or): Match or and ior
* config/riscv/predicates.md (const_twobits_operand):
New predicate.
(uimm_extra_bit_operand): New predicate.
(uimm_extra_bit_or_twobits): New predicate.
(not_uimm_extra_bit_operand): New predicate.
(not_uimm_extra_bit_or_nottwobits): New predicate.
* config/riscv/riscv.h (UIMM_EXTRA_BIT_OPERAND):
Helper for the uimm_extra_bit_operand and
not_uimm_extra_bit_operand predicates.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/zbs-bclri-02.c: New test.
* gcc.target/riscv/zbs-binvi.c: New test.
* gcc.target/riscv/zbs-bseti.c: New test.
Signed-off-by: Philipp Tomsich <philipp.tomsich@vrull.eu>
---
- This no longer depends on "RISC-V: Optimize branches testing a
bit-range or a shifted immediate". The other series now needs to be
adjusted before merging.
Changes in v2:
- Collects already approved changes for v2 for (a | C) and (a ^ C).
- Pulls in the (already) approved branch on polarity-reversed bits
for v2, as it shares predicates with the other changes.
- Newly adds support for the (a & ~C) case.
- Use an iterator for the ori/xori case and share one pattern
- Adds the andi (a & ~C) case, expanding to bclri/andi.
- Cleans up the predicates (incl. removing the non-intuitive inclusion
of two-bits-set under the uimm_extra_bits)
gcc/config/riscv/bitmanip.md | 37 +++++++++++++++++++
gcc/config/riscv/iterators.md | 8 ++++
gcc/config/riscv/predicates.md | 28 ++++++++++++++
gcc/config/riscv/riscv.h | 8 ++++
.../riscv/{zbs-bclri.c => zbs-bclri-01.c} | 0
gcc/testsuite/gcc.target/riscv/zbs-bclri-02.c | 27 ++++++++++++++
gcc/testsuite/gcc.target/riscv/zbs-binvi.c | 22 +++++++++++
gcc/testsuite/gcc.target/riscv/zbs-bseti.c | 27 ++++++++++++++
8 files changed, 157 insertions(+)
rename gcc/testsuite/gcc.target/riscv/{zbs-bclri.c => zbs-bclri-01.c} (100%)
create mode 100644 gcc/testsuite/gcc.target/riscv/zbs-bclri-02.c
create mode 100644 gcc/testsuite/gcc.target/riscv/zbs-binvi.c
create mode 100644 gcc/testsuite/gcc.target/riscv/zbs-bseti.c
@@ -480,3 +480,40 @@ (define_split
"TARGET_ZBS"
[(set (match_dup 0) (zero_extract:GPR (match_dup 1) (const_int 1) (match_dup 2)))
(set (match_dup 0) (plus:GPR (match_dup 0) (const_int -1)))])
+
+;; Catch those cases where we can use a bseti/binvi + ori/xori or
+;; bseti/binvi + bseti/binvi instead of a lui + addi + or/xor sequence.
+(define_insn_and_split "*<or_optab>i<mode>_extrabit"
+ [(set (match_operand:X 0 "register_operand" "=r")
+ (any_or:X (match_operand:X 1 "register_operand" "r")
+ (match_operand:X 2 "uimm_extra_bit_or_twobits" "i")))]
+ "TARGET_ZBS"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0) (<or_optab>:X (match_dup 1) (match_dup 3)))
+ (set (match_dup 0) (<or_optab>:X (match_dup 0) (match_dup 4)))]
+{
+ unsigned HOST_WIDE_INT bits = UINTVAL (operands[2]);
+ unsigned HOST_WIDE_INT topbit = HOST_WIDE_INT_1U << floor_log2 (bits);
+
+ operands[3] = GEN_INT (bits &~ topbit);
+ operands[4] = GEN_INT (topbit);
+})
+
+;; Same to use blcri + andi and blcri + bclri
+(define_insn_and_split "*andi<mode>_extrabit"
+ [(set (match_operand:X 0 "register_operand" "=r")
+ (and:X (match_operand:X 1 "register_operand" "r")
+ (match_operand:X 2 "not_uimm_extra_bit_or_nottwobits" "i")))]
+ "TARGET_ZBS"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0) (and:X (match_dup 1) (match_dup 3)))
+ (set (match_dup 0) (and:X (match_dup 0) (match_dup 4)))]
+{
+ unsigned HOST_WIDE_INT bits = UINTVAL (operands[2]);
+ unsigned HOST_WIDE_INT topbit = HOST_WIDE_INT_1U << floor_log2 (~bits);
+
+ operands[3] = GEN_INT (bits | topbit);
+ operands[4] = GEN_INT (~topbit);
+})
@@ -136,6 +136,10 @@ (define_code_iterator any_shift [ashift ashiftrt lshiftrt])
;; from the same template.
(define_code_iterator any_bitwise [and ior xor])
+;; This code iterator allows ior and xor instructions to be generated
+;; from the same template.
+(define_code_iterator any_or [ior xor])
+
;; This code iterator allows unsigned and signed division to be generated
;; from the same template.
(define_code_iterator any_div [div udiv mod umod])
@@ -194,6 +198,10 @@ (define_code_attr optab [(ashift "ashl")
(plus "add")
(minus "sub")])
+;; <or_optab> code attributes
+(define_code_attr or_optab [(ior "ior")
+ (xor "xor")])
+
;; <insn> expands to the name of the insn that implements a particular code.
(define_code_attr insn [(ashift "sll")
(ashiftrt "sra")
@@ -290,3 +290,31 @@ (define_predicate "vector_merge_operand"
(define_predicate "const_nottwobits_operand"
(and (match_code "const_int")
(match_test "popcount_hwi (~UINTVAL (op)) == 2")))
+
+;; A CONST_INT operand that has exactly two bits set.
+(define_predicate "const_twobits_operand"
+ (and (match_code "const_int")
+ (match_test "popcount_hwi (UINTVAL (op)) == 2")))
+
+;; A CONST_INT operand that fits into the unsigned half of a
+;; signed-immediate after the top bit has been cleared
+(define_predicate "uimm_extra_bit_operand"
+ (and (match_code "const_int")
+ (match_test "UIMM_EXTRA_BIT_OPERAND (UINTVAL (op))")))
+
+(define_predicate "uimm_extra_bit_or_twobits"
+ (and (match_code "const_int")
+ (ior (match_operand 0 "uimm_extra_bit_operand")
+ (match_operand 0 "const_twobits_operand"))))
+
+;; A CONST_INT operand that fits into the negative half of a
+;; signed-immediate after a single cleared top bit has been
+;; set: i.e., a bitwise-negated uimm_extra_bit_operand
+(define_predicate "not_uimm_extra_bit_operand"
+ (and (match_code "const_int")
+ (match_test "UIMM_EXTRA_BIT_OPERAND (~UINTVAL (op))")))
+
+(define_predicate "not_uimm_extra_bit_or_nottwobits"
+ (and (match_code "const_int")
+ (ior (match_operand 0 "not_uimm_extra_bit_operand")
+ (match_operand 0 "const_nottwobits_operand"))))
@@ -593,6 +593,14 @@ enum reg_class
? (VALUE) \
: ((VALUE) & ((HOST_WIDE_INT_1U << 32)-1))))
+/* True if VALUE can be represented as an immediate with 1 extra bit
+ set: we check that it is not a SMALL_OPERAND (as this would be true
+ for all small operands) unmodified and turns into a small operand
+ once we clear the top bit. */
+#define UIMM_EXTRA_BIT_OPERAND(VALUE) \
+ (!SMALL_OPERAND (VALUE) \
+ && SMALL_OPERAND (VALUE & ~(HOST_WIDE_INT_1U << floor_log2 (VALUE))))
+
/* Stack layout; function entry, exit and calling. */
#define STACK_GROWS_DOWNWARD 1
similarity index 100%
rename from gcc/testsuite/gcc.target/riscv/zbs-bclri.c
rename to gcc/testsuite/gcc.target/riscv/zbs-bclri-01.c
new file mode 100644
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zbs -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+
+long long f3(long long a)
+{
+ return a & ~0x1100;
+}
+
+long long f4 (long long a)
+{
+ return a & ~0x80000000000000ffull;
+}
+
+long long f5 (long long a)
+{
+ return a & ~0x8000001000000000ull;
+}
+
+long long f6 (long long a)
+{
+ return a & ~0xff7ffffffffffffull;
+}
+
+/* { dg-final { scan-assembler-times "bclri\t" 4 } } */
+/* { dg-final { scan-assembler-times "andi\t" 2 } } */
+
new file mode 100644
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zbs -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+
+long long f3(long long a)
+{
+ return a ^ 0x1100;
+}
+
+long long f4 (long long a)
+{
+ return a ^ 0x80000000000000ffull;
+}
+
+long long f5 (long long a)
+{
+ return a ^ 0x8000001000000000ull;
+}
+
+/* { dg-final { scan-assembler-times "binvi\t" 4 } } */
+/* { dg-final { scan-assembler-times "xori\t" 2 } } */
+
new file mode 100644
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zbs -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+
+long long foo1 (long long a)
+{
+ return a | 0x1100;
+}
+
+long long foo2 (long long a)
+{
+ return a | 0x80000000000000ffull;
+}
+
+long long foo3 (long long a)
+{
+ return a | 0x8000000100000000ull;
+}
+
+long long foo4 (long long a)
+{
+ return a | 0xfff;
+}
+
+/* { dg-final { scan-assembler-times "bseti\t" 5 } } */
+/* { dg-final { scan-assembler-times "ori\t" 3 } } */
+