RISC-V: Add vectorized strcmp.
Checks
Commit Message
Hi,
this patch adds a vectorized strcmp implementation and tests. Similar
to strlen, expansion is still guarded by -minline-strcmp. I just
realized I forgot to make it a series but this one is actually
dependent on the NFC patch and the rawmemchr fix before.
Regards
Robin
gcc/ChangeLog:
* config/riscv/riscv-protos.h (expand_strcmp): Declare.
* config/riscv/riscv-string.cc (riscv_expand_strcmp): Add
strategy handling and delegation to scalar and vector expanders.
(expand_strcmp): Vectorized implementation.
* config/riscv/riscv.md: Add TARGET_VECTOR to strcmp expander.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c: New test.
* gcc.target/riscv/rvv/autovec/builtin/strcmp.c: New test.
---
gcc/config/riscv/riscv-protos.h | 1 +
gcc/config/riscv/riscv-string.cc | 161 +++++++++++++++++-
gcc/config/riscv/riscv.md | 3 +-
.../riscv/rvv/autovec/builtin/strcmp-run.c | 32 ++++
.../riscv/rvv/autovec/builtin/strcmp.c | 13 ++
5 files changed, 206 insertions(+), 4 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp.c
Comments
lgtm
juzhe.zhong@rivai.ai
From: Robin Dapp
Date: 2023-12-01 23:23
To: gcc-patches; palmer; Kito Cheng; jeffreyalaw; juzhe.zhong@rivai.ai
CC: rdapp.gcc
Subject: [PATCH] RISC-V: Add vectorized strcmp.
Hi,
this patch adds a vectorized strcmp implementation and tests. Similar
to strlen, expansion is still guarded by -minline-strcmp. I just
realized I forgot to make it a series but this one is actually
dependent on the NFC patch and the rawmemchr fix before.
Regards
Robin
gcc/ChangeLog:
* config/riscv/riscv-protos.h (expand_strcmp): Declare.
* config/riscv/riscv-string.cc (riscv_expand_strcmp): Add
strategy handling and delegation to scalar and vector expanders.
(expand_strcmp): Vectorized implementation.
* config/riscv/riscv.md: Add TARGET_VECTOR to strcmp expander.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c: New test.
* gcc.target/riscv/rvv/autovec/builtin/strcmp.c: New test.
---
gcc/config/riscv/riscv-protos.h | 1 +
gcc/config/riscv/riscv-string.cc | 161 +++++++++++++++++-
gcc/config/riscv/riscv.md | 3 +-
.../riscv/rvv/autovec/builtin/strcmp-run.c | 32 ++++
.../riscv/rvv/autovec/builtin/strcmp.c | 13 ++
5 files changed, 206 insertions(+), 4 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp.c
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index c94c82a9973..5878a674413 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -558,6 +558,7 @@ void expand_cond_binop (unsigned, rtx *);
void expand_cond_ternop (unsigned, rtx *);
void expand_popcount (rtx *);
void expand_rawmemchr (machine_mode, rtx, rtx, rtx, bool = false);
+bool expand_strcmp (rtx, rtx, rtx, rtx, unsigned HOST_WIDE_INT, bool);
void emit_vec_extract (rtx, rtx, poly_int64);
/* Rounding mode bitfield for fixed point VXRM. */
diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 6cde1bf89a0..11c1f74d0b3 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -511,12 +511,19 @@ riscv_expand_strcmp (rtx result, rtx src1, rtx src2,
return false;
alignment = UINTVAL (align_rtx);
- if (TARGET_ZBB || TARGET_XTHEADBB)
+ if (TARGET_VECTOR && stringop_strategy & STRATEGY_VECTOR)
{
- return riscv_expand_strcmp_scalar (result, src1, src2, nbytes, alignment,
- ncompare);
+ bool ok = riscv_vector::expand_strcmp (result, src1, src2,
+ bytes_rtx, alignment,
+ ncompare);
+ if (ok)
+ return true;
}
+ if ((TARGET_ZBB || TARGET_XTHEADBB) && stringop_strategy & STRATEGY_SCALAR)
+ return riscv_expand_strcmp_scalar (result, src1, src2, nbytes, alignment,
+ ncompare);
+
return false;
}
@@ -1092,4 +1099,152 @@ expand_rawmemchr (machine_mode mode, rtx dst, rtx haystack, rtx needle,
}
}
+/* Implement cmpstr<mode> using vector instructions. The ALIGNMENT and
+ NCOMPARE parameters are unused for now. */
+
+bool
+expand_strcmp (rtx result, rtx src1, rtx src2, rtx nbytes,
+ unsigned HOST_WIDE_INT, bool)
+{
+ gcc_assert (TARGET_VECTOR);
+
+ /* We don't support big endian. */
+ if (BYTES_BIG_ENDIAN)
+ return false;
+
+ bool with_length = nbytes != NULL_RTX;
+
+ if (with_length
+ && (!REG_P (nbytes) && !SUBREG_P (nbytes) && !CONST_INT_P (nbytes)))
+ return false;
+
+ if (with_length && CONST_INT_P (nbytes))
+ nbytes = force_reg (Pmode, nbytes);
+
+ machine_mode mode = E_QImode;
+ unsigned int isize = GET_MODE_SIZE (mode).to_constant ();
+ int lmul = TARGET_MAX_LMUL;
+ poly_int64 nunits = exact_div (BYTES_PER_RISCV_VECTOR * lmul, isize);
+
+ machine_mode vmode;
+ if (!riscv_vector::get_vector_mode (GET_MODE_INNER (mode), nunits)
+ .exists (&vmode))
+ gcc_unreachable ();
+
+ machine_mode mask_mode = riscv_vector::get_mask_mode (vmode);
+
+ /* Prepare addresses. */
+ rtx src_addr1 = copy_addr_to_reg (XEXP (src1, 0));
+ rtx vsrc1 = change_address (src1, vmode, src_addr1);
+
+ rtx src_addr2 = copy_addr_to_reg (XEXP (src2, 0));
+ rtx vsrc2 = change_address (src2, vmode, src_addr2);
+
+ /* Set initial pointer bump to 0. */
+ rtx cnt = gen_reg_rtx (Pmode);
+ emit_move_insn (cnt, CONST0_RTX (Pmode));
+
+ rtx sub = gen_reg_rtx (Pmode);
+ emit_move_insn (sub, CONST0_RTX (Pmode));
+
+ /* Create source vectors. */
+ rtx vec1 = gen_reg_rtx (vmode);
+ rtx vec2 = gen_reg_rtx (vmode);
+
+ rtx done = gen_label_rtx ();
+ rtx loop = gen_label_rtx ();
+ emit_label (loop);
+
+ /* Bump the pointers. */
+ emit_insn (gen_rtx_SET (src_addr1, gen_rtx_PLUS (Pmode, src_addr1, cnt)));
+ emit_insn (gen_rtx_SET (src_addr2, gen_rtx_PLUS (Pmode, src_addr2, cnt)));
+
+ rtx vlops1[] = {vec1, vsrc1};
+ rtx vlops2[] = {vec2, vsrc2};
+
+ if (!with_length)
+ {
+ emit_vlmax_insn (code_for_pred_fault_load (vmode),
+ riscv_vector::UNARY_OP, vlops1);
+
+ emit_vlmax_insn (code_for_pred_fault_load (vmode),
+ riscv_vector::UNARY_OP, vlops2);
+ }
+ else
+ {
+ nbytes = gen_lowpart (Pmode, nbytes);
+ emit_nonvlmax_insn (code_for_pred_fault_load (vmode),
+ riscv_vector::UNARY_OP, vlops1, nbytes);
+
+ emit_nonvlmax_insn (code_for_pred_fault_load (vmode),
+ riscv_vector::UNARY_OP, vlops2, nbytes);
+ }
+
+ /* Read the vl for the next pointer bump. */
+ if (Pmode == SImode)
+ emit_insn (gen_read_vlsi (cnt));
+ else
+ emit_insn (gen_read_vldi_zero_extend (cnt));
+
+ if (with_length)
+ {
+ rtx test_done = gen_rtx_EQ (VOIDmode, cnt, const0_rtx);
+ emit_jump_insn (gen_cbranch4 (Pmode, test_done, cnt, const0_rtx, done));
+ emit_insn (gen_rtx_SET (nbytes, gen_rtx_MINUS (Pmode, nbytes, cnt)));
+ }
+
+ /* Look for a \0 in the first string. */
+ rtx mask0 = gen_reg_rtx (mask_mode);
+ rtx eq0
+ = gen_rtx_EQ (mask_mode, gen_const_vec_duplicate (vmode, CONST0_RTX (mode)),
+ vec1);
+ rtx vmsops1[] = {mask0, eq0, vec1, CONST0_RTX (mode)};
+ emit_nonvlmax_insn (code_for_pred_eqne_scalar (vmode),
+ riscv_vector::COMPARE_OP, vmsops1, cnt);
+
+ /* Look for vec1 != vec2 (includes vec2[i] == 0). */
+ rtx maskne = gen_reg_rtx (mask_mode);
+ rtx ne = gen_rtx_NE (mask_mode, vec1, vec2);
+ rtx vmsops[] = {maskne, ne, vec1, vec2};
+ emit_nonvlmax_insn (code_for_pred_cmp (vmode), riscv_vector::COMPARE_OP,
+ vmsops, cnt);
+
+ /* Combine both masks into one. */
+ rtx mask = gen_reg_rtx (mask_mode);
+ rtx vmorops[] = {mask, mask0, maskne};
+ emit_nonvlmax_insn (code_for_pred (IOR, mask_mode),
+ riscv_vector::BINARY_MASK_OP, vmorops, cnt);
+
+ /* Find the first bit in the mask (the first unequal element). */
+ rtx found_at = gen_reg_rtx (Pmode);
+ rtx vfops[] = {found_at, mask};
+ emit_nonvlmax_insn (code_for_pred_ffs (mask_mode, Pmode),
+ riscv_vector::CPOP_OP, vfops, cnt);
+
+ /* Emit the loop condition. */
+ rtx test = gen_rtx_LT (VOIDmode, found_at, const0_rtx);
+ emit_jump_insn (gen_cbranch4 (Pmode, test, found_at, const0_rtx, loop));
+
+ /* Walk up to the difference point. */
+ emit_insn (
+ gen_rtx_SET (src_addr1, gen_rtx_PLUS (Pmode, src_addr1, found_at)));
+ emit_insn (
+ gen_rtx_SET (src_addr2, gen_rtx_PLUS (Pmode, src_addr2, found_at)));
+
+ /* Load the respective byte and compute the difference. */
+ rtx c1 = gen_reg_rtx (Pmode);
+ rtx c2 = gen_reg_rtx (Pmode);
+
+ do_load_from_addr (mode, c1, src_addr1, src1);
+ do_load_from_addr (mode, c2, src_addr2, src2);
+
+ do_sub3 (sub, c1, c2);
+
+ if (with_length)
+ emit_label (done);
+
+ emit_insn (gen_movsi (result, gen_lowpart (SImode, sub)));
+ return true;
+}
+
}
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 54015eed57c..b805b1723b8 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -3726,7 +3726,8 @@ (define_expand "cmpstrsi"
(compare:SI (match_operand:BLK 1)
(match_operand:BLK 2)))
(use (match_operand:SI 3))])]
- "riscv_inline_strcmp && !optimize_size && (TARGET_ZBB || TARGET_XTHEADBB)"
+ "riscv_inline_strcmp && !optimize_size
+ && (TARGET_ZBB || TARGET_XTHEADBB || TARGET_VECTOR)"
{
if (riscv_expand_strcmp (operands[0], operands[1], operands[2],
NULL_RTX, operands[3]))
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c
new file mode 100644
index 00000000000..6dec7da91c1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c
@@ -0,0 +1,32 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O3 -minline-strcmp" } */
+
+#include <string.h>
+
+int
+__attribute__ ((noipa))
+foo (const char *s, const char *t)
+{
+ return __builtin_strcmp (s, t);
+}
+
+int
+__attribute__ ((noipa, optimize ("0")))
+foo2 (const char *s, const char *t)
+{
+ return strcmp (s, t);
+}
+
+#define SZ 10
+
+int main ()
+{
+ const char *s[SZ]
+ = {"", "asdf", "0", "\0", "!@#$%***m1123fdnmoi43",
+ "a", "z", "1", "9", "12345678901234567889012345678901234567890"};
+
+ for (int i = 0; i < SZ; i++)
+ for (int j = 0; j < SZ; j++)
+ if (foo (s[i], s[j]) != foo2 (s[i], s[j]))
+ __builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp.c
new file mode 100644
index 00000000000..f9d33a74fc5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { riscv_v } } } */
+/* { dg-additional-options "-O3 -minline-strcmp" } */
+
+int
+__attribute__ ((noipa))
+foo (const char *s, const char *t)
+{
+ return __builtin_strcmp (s, t);
+}
+
+/* { dg-final { scan-assembler-times "vle8ff" 2 } } */
+/* { dg-final { scan-assembler-times "vfirst.m" 1 } } */
+/* { dg-final { scan-assembler-times "vmor.m" 1 } } */
--
2.43.0
Similar to strlen, this now seems safe to push. Will do so
later.
I tested on rv64gcv_zvl128b with -minline-strlen and didn't see
regressions.
Regards
Robin
Ah, I forgot to attach the current v2 that also enables strncmp.
It was additionally tested with -minline-strncmp on rv64gcv.
Regards
Robin
Subject: [PATCH v2] RISC-V: Add vectorized strcmp and strncmp.
This patch adds vectorized strcmp and strncmp implementations and
tests. Similar to strlen, expansion is still guarded by
-minline-str(n)cmp.
gcc/ChangeLog:
PR target/112109
* config/riscv/riscv-protos.h (expand_strcmp): Declare.
* config/riscv/riscv-string.cc (riscv_expand_strcmp): Add
strategy handling and delegation to scalar and vector expanders.
(expand_strcmp): Vectorized implementation.
* config/riscv/riscv.md: Add TARGET_VECTOR to strcmp and strncmp
expander.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c: New test.
* gcc.target/riscv/rvv/autovec/builtin/strcmp.c: New test.
* gcc.target/riscv/rvv/autovec/builtin/strncmp-run.c: New test.
* gcc.target/riscv/rvv/autovec/builtin/strncmp.c: New test.
---
gcc/config/riscv/riscv-protos.h | 1 +
gcc/config/riscv/riscv-string.cc | 161 +++++++++++++++++-
gcc/config/riscv/riscv.md | 6 +-
.../riscv/rvv/autovec/builtin/strcmp-run.c | 32 ++++
.../riscv/rvv/autovec/builtin/strcmp.c | 13 ++
.../riscv/rvv/autovec/builtin/strncmp-run.c | 136 +++++++++++++++
.../riscv/rvv/autovec/builtin/strncmp.c | 13 ++
7 files changed, 357 insertions(+), 5 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp-run.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp.c
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index c7b5789a4b3..20bbb5b859c 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -558,6 +558,7 @@ void expand_cond_binop (unsigned, rtx *);
void expand_cond_ternop (unsigned, rtx *);
void expand_popcount (rtx *);
void expand_rawmemchr (machine_mode, rtx, rtx, rtx, bool = false);
+bool expand_strcmp (rtx, rtx, rtx, rtx, unsigned HOST_WIDE_INT, bool);
void emit_vec_extract (rtx, rtx, poly_int64);
/* Rounding mode bitfield for fixed point VXRM. */
diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 6cde1bf89a0..11c1f74d0b3 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -511,12 +511,19 @@ riscv_expand_strcmp (rtx result, rtx src1, rtx src2,
return false;
alignment = UINTVAL (align_rtx);
- if (TARGET_ZBB || TARGET_XTHEADBB)
+ if (TARGET_VECTOR && stringop_strategy & STRATEGY_VECTOR)
{
- return riscv_expand_strcmp_scalar (result, src1, src2, nbytes, alignment,
- ncompare);
+ bool ok = riscv_vector::expand_strcmp (result, src1, src2,
+ bytes_rtx, alignment,
+ ncompare);
+ if (ok)
+ return true;
}
+ if ((TARGET_ZBB || TARGET_XTHEADBB) && stringop_strategy & STRATEGY_SCALAR)
+ return riscv_expand_strcmp_scalar (result, src1, src2, nbytes, alignment,
+ ncompare);
+
return false;
}
@@ -1092,4 +1099,152 @@ expand_rawmemchr (machine_mode mode, rtx dst, rtx haystack, rtx needle,
}
}
+/* Implement cmpstr<mode> using vector instructions. The ALIGNMENT and
+ NCOMPARE parameters are unused for now. */
+
+bool
+expand_strcmp (rtx result, rtx src1, rtx src2, rtx nbytes,
+ unsigned HOST_WIDE_INT, bool)
+{
+ gcc_assert (TARGET_VECTOR);
+
+ /* We don't support big endian. */
+ if (BYTES_BIG_ENDIAN)
+ return false;
+
+ bool with_length = nbytes != NULL_RTX;
+
+ if (with_length
+ && (!REG_P (nbytes) && !SUBREG_P (nbytes) && !CONST_INT_P (nbytes)))
+ return false;
+
+ if (with_length && CONST_INT_P (nbytes))
+ nbytes = force_reg (Pmode, nbytes);
+
+ machine_mode mode = E_QImode;
+ unsigned int isize = GET_MODE_SIZE (mode).to_constant ();
+ int lmul = TARGET_MAX_LMUL;
+ poly_int64 nunits = exact_div (BYTES_PER_RISCV_VECTOR * lmul, isize);
+
+ machine_mode vmode;
+ if (!riscv_vector::get_vector_mode (GET_MODE_INNER (mode), nunits)
+ .exists (&vmode))
+ gcc_unreachable ();
+
+ machine_mode mask_mode = riscv_vector::get_mask_mode (vmode);
+
+ /* Prepare addresses. */
+ rtx src_addr1 = copy_addr_to_reg (XEXP (src1, 0));
+ rtx vsrc1 = change_address (src1, vmode, src_addr1);
+
+ rtx src_addr2 = copy_addr_to_reg (XEXP (src2, 0));
+ rtx vsrc2 = change_address (src2, vmode, src_addr2);
+
+ /* Set initial pointer bump to 0. */
+ rtx cnt = gen_reg_rtx (Pmode);
+ emit_move_insn (cnt, CONST0_RTX (Pmode));
+
+ rtx sub = gen_reg_rtx (Pmode);
+ emit_move_insn (sub, CONST0_RTX (Pmode));
+
+ /* Create source vectors. */
+ rtx vec1 = gen_reg_rtx (vmode);
+ rtx vec2 = gen_reg_rtx (vmode);
+
+ rtx done = gen_label_rtx ();
+ rtx loop = gen_label_rtx ();
+ emit_label (loop);
+
+ /* Bump the pointers. */
+ emit_insn (gen_rtx_SET (src_addr1, gen_rtx_PLUS (Pmode, src_addr1, cnt)));
+ emit_insn (gen_rtx_SET (src_addr2, gen_rtx_PLUS (Pmode, src_addr2, cnt)));
+
+ rtx vlops1[] = {vec1, vsrc1};
+ rtx vlops2[] = {vec2, vsrc2};
+
+ if (!with_length)
+ {
+ emit_vlmax_insn (code_for_pred_fault_load (vmode),
+ riscv_vector::UNARY_OP, vlops1);
+
+ emit_vlmax_insn (code_for_pred_fault_load (vmode),
+ riscv_vector::UNARY_OP, vlops2);
+ }
+ else
+ {
+ nbytes = gen_lowpart (Pmode, nbytes);
+ emit_nonvlmax_insn (code_for_pred_fault_load (vmode),
+ riscv_vector::UNARY_OP, vlops1, nbytes);
+
+ emit_nonvlmax_insn (code_for_pred_fault_load (vmode),
+ riscv_vector::UNARY_OP, vlops2, nbytes);
+ }
+
+ /* Read the vl for the next pointer bump. */
+ if (Pmode == SImode)
+ emit_insn (gen_read_vlsi (cnt));
+ else
+ emit_insn (gen_read_vldi_zero_extend (cnt));
+
+ if (with_length)
+ {
+ rtx test_done = gen_rtx_EQ (VOIDmode, cnt, const0_rtx);
+ emit_jump_insn (gen_cbranch4 (Pmode, test_done, cnt, const0_rtx, done));
+ emit_insn (gen_rtx_SET (nbytes, gen_rtx_MINUS (Pmode, nbytes, cnt)));
+ }
+
+ /* Look for a \0 in the first string. */
+ rtx mask0 = gen_reg_rtx (mask_mode);
+ rtx eq0
+ = gen_rtx_EQ (mask_mode, gen_const_vec_duplicate (vmode, CONST0_RTX (mode)),
+ vec1);
+ rtx vmsops1[] = {mask0, eq0, vec1, CONST0_RTX (mode)};
+ emit_nonvlmax_insn (code_for_pred_eqne_scalar (vmode),
+ riscv_vector::COMPARE_OP, vmsops1, cnt);
+
+ /* Look for vec1 != vec2 (includes vec2[i] == 0). */
+ rtx maskne = gen_reg_rtx (mask_mode);
+ rtx ne = gen_rtx_NE (mask_mode, vec1, vec2);
+ rtx vmsops[] = {maskne, ne, vec1, vec2};
+ emit_nonvlmax_insn (code_for_pred_cmp (vmode), riscv_vector::COMPARE_OP,
+ vmsops, cnt);
+
+ /* Combine both masks into one. */
+ rtx mask = gen_reg_rtx (mask_mode);
+ rtx vmorops[] = {mask, mask0, maskne};
+ emit_nonvlmax_insn (code_for_pred (IOR, mask_mode),
+ riscv_vector::BINARY_MASK_OP, vmorops, cnt);
+
+ /* Find the first bit in the mask (the first unequal element). */
+ rtx found_at = gen_reg_rtx (Pmode);
+ rtx vfops[] = {found_at, mask};
+ emit_nonvlmax_insn (code_for_pred_ffs (mask_mode, Pmode),
+ riscv_vector::CPOP_OP, vfops, cnt);
+
+ /* Emit the loop condition. */
+ rtx test = gen_rtx_LT (VOIDmode, found_at, const0_rtx);
+ emit_jump_insn (gen_cbranch4 (Pmode, test, found_at, const0_rtx, loop));
+
+ /* Walk up to the difference point. */
+ emit_insn (
+ gen_rtx_SET (src_addr1, gen_rtx_PLUS (Pmode, src_addr1, found_at)));
+ emit_insn (
+ gen_rtx_SET (src_addr2, gen_rtx_PLUS (Pmode, src_addr2, found_at)));
+
+ /* Load the respective byte and compute the difference. */
+ rtx c1 = gen_reg_rtx (Pmode);
+ rtx c2 = gen_reg_rtx (Pmode);
+
+ do_load_from_addr (mode, c1, src_addr1, src1);
+ do_load_from_addr (mode, c2, src_addr2, src2);
+
+ do_sub3 (sub, c1, c2);
+
+ if (with_length)
+ emit_label (done);
+
+ emit_insn (gen_movsi (result, gen_lowpart (SImode, sub)));
+ return true;
+}
+
}
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 6f9dec8c152..eed997116b0 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -3702,7 +3702,8 @@ (define_expand "cmpstrnsi"
(match_operand:BLK 2)))
(use (match_operand:SI 3))
(use (match_operand:SI 4))])]
- "riscv_inline_strncmp && !optimize_size && (TARGET_ZBB || TARGET_XTHEADBB)"
+ "riscv_inline_strncmp && !optimize_size
+ && (TARGET_ZBB || TARGET_XTHEADBB || TARGET_VECTOR)"
{
if (riscv_expand_strcmp (operands[0], operands[1], operands[2],
operands[3], operands[4]))
@@ -3722,7 +3723,8 @@ (define_expand "cmpstrsi"
(compare:SI (match_operand:BLK 1)
(match_operand:BLK 2)))
(use (match_operand:SI 3))])]
- "riscv_inline_strcmp && !optimize_size && (TARGET_ZBB || TARGET_XTHEADBB)"
+ "riscv_inline_strcmp && !optimize_size
+ && (TARGET_ZBB || TARGET_XTHEADBB || TARGET_VECTOR)"
{
if (riscv_expand_strcmp (operands[0], operands[1], operands[2],
NULL_RTX, operands[3]))
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c
new file mode 100644
index 00000000000..6dec7da91c1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c
@@ -0,0 +1,32 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O3 -minline-strcmp" } */
+
+#include <string.h>
+
+int
+__attribute__ ((noipa))
+foo (const char *s, const char *t)
+{
+ return __builtin_strcmp (s, t);
+}
+
+int
+__attribute__ ((noipa, optimize ("0")))
+foo2 (const char *s, const char *t)
+{
+ return strcmp (s, t);
+}
+
+#define SZ 10
+
+int main ()
+{
+ const char *s[SZ]
+ = {"", "asdf", "0", "\0", "!@#$%***m1123fdnmoi43",
+ "a", "z", "1", "9", "12345678901234567889012345678901234567890"};
+
+ for (int i = 0; i < SZ; i++)
+ for (int j = 0; j < SZ; j++)
+ if (foo (s[i], s[j]) != foo2 (s[i], s[j]))
+ __builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp.c
new file mode 100644
index 00000000000..f9d33a74fc5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { riscv_v } } } */
+/* { dg-additional-options "-O3 -minline-strcmp" } */
+
+int
+__attribute__ ((noipa))
+foo (const char *s, const char *t)
+{
+ return __builtin_strcmp (s, t);
+}
+
+/* { dg-final { scan-assembler-times "vle8ff" 2 } } */
+/* { dg-final { scan-assembler-times "vfirst.m" 1 } } */
+/* { dg-final { scan-assembler-times "vmor.m" 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp-run.c
new file mode 100644
index 00000000000..8d1471a3a13
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp-run.c
@@ -0,0 +1,136 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O3 -minline-strcmp" } */
+
+#include <string.h>
+
+int
+__attribute__ ((noipa, optimize ("0")))
+foo2 (const char *s, const char *t, int n)
+{
+ return strncmp (s, t, n);
+}
+
+#define SZ 11
+
+#define TEST(I, J, N) \
+ int res_##I_##J_##N = __builtin_strncmp (s[I], s[J], N); \
+ int ref_##I_##J_##N = foo2 (s[I], s[J], N); \
+ if (res_##I_##J_##N != ref_##I_##J_##N) \
+ __builtin_abort ();
+
+int main ()
+{
+ const char *s[SZ]
+ = {"", "asdf", "0", "\0", "!@#$%***m1123fdnmoi43",
+ "a", "z", "1", "9", "12345678901234567889012345678901234567890",
+ "ds0fi0349r0sdmfvi0sjf0c9fj034mrx903cw0efmc9jfsicn2390crrm0i90msdfi0sdf0"};
+
+ for (int i = 0; i < SZ; i++)
+ for (int j = 0; j < SZ; j++)
+ {
+ TEST(i, j, 0)
+ TEST(i, j, 1)
+ TEST(i, j, 2)
+ TEST(i, j, 3)
+ TEST(i, j, 4)
+ TEST(i, j, 5)
+ TEST(i, j, 6)
+ TEST(i, j, 7)
+ TEST(i, j, 8)
+ TEST(i, j, 9)
+ TEST(i, j, 10)
+ TEST(i, j, 11)
+ TEST(i, j, 12)
+ TEST(i, j, 13)
+ TEST(i, j, 14)
+ TEST(i, j, 15)
+ TEST(i, j, 16)
+ TEST(i, j, 17)
+ TEST(i, j, 18)
+ TEST(i, j, 19)
+ TEST(i, j, 20)
+ TEST(i, j, 21)
+ TEST(i, j, 22)
+ TEST(i, j, 23)
+ TEST(i, j, 24)
+ TEST(i, j, 25)
+ TEST(i, j, 26)
+ TEST(i, j, 27)
+ TEST(i, j, 28)
+ TEST(i, j, 29)
+ TEST(i, j, 30)
+ TEST(i, j, 31)
+ TEST(i, j, 32)
+ TEST(i, j, 33)
+ TEST(i, j, 34)
+ TEST(i, j, 35)
+ TEST(i, j, 36)
+ TEST(i, j, 37)
+ TEST(i, j, 38)
+ TEST(i, j, 39)
+ TEST(i, j, 40)
+ TEST(i, j, 41)
+ TEST(i, j, 42)
+ TEST(i, j, 43)
+ TEST(i, j, 44)
+ TEST(i, j, 45)
+ TEST(i, j, 46)
+ TEST(i, j, 47)
+ TEST(i, j, 48)
+ TEST(i, j, 49)
+ TEST(i, j, 50)
+ TEST(i, j, 51)
+ TEST(i, j, 52)
+ TEST(i, j, 53)
+ TEST(i, j, 54)
+ TEST(i, j, 55)
+ TEST(i, j, 56)
+ TEST(i, j, 57)
+ TEST(i, j, 58)
+ TEST(i, j, 59)
+ TEST(i, j, 60)
+ TEST(i, j, 61)
+ TEST(i, j, 62)
+ TEST(i, j, 63)
+ TEST(i, j, 64)
+ TEST(i, j, 65)
+ TEST(i, j, 66)
+ TEST(i, j, 67)
+ TEST(i, j, 68)
+ TEST(i, j, 69)
+ TEST(i, j, 70)
+ TEST(i, j, 71)
+ TEST(i, j, 72)
+ TEST(i, j, 73)
+ TEST(i, j, 74)
+ TEST(i, j, 75)
+ TEST(i, j, 76)
+ TEST(i, j, 77)
+ TEST(i, j, 78)
+ TEST(i, j, 79)
+ TEST(i, j, 80)
+ TEST(i, j, 81)
+ TEST(i, j, 82)
+ TEST(i, j, 83)
+ TEST(i, j, 84)
+ TEST(i, j, 85)
+ TEST(i, j, 86)
+ TEST(i, j, 87)
+ TEST(i, j, 88)
+ TEST(i, j, 89)
+ TEST(i, j, 90)
+ TEST(i, j, 91)
+ TEST(i, j, 92)
+ TEST(i, j, 93)
+ TEST(i, j, 94)
+ TEST(i, j, 95)
+ TEST(i, j, 96)
+ TEST(i, j, 97)
+ TEST(i, j, 98)
+ TEST(i, j, 99)
+ TEST(i, j, 100)
+ TEST(i, j, 101)
+ TEST(i, j, 102)
+ TEST(i, j, 103)
+ }
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp.c
new file mode 100644
index 00000000000..a89633ea9d3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { riscv_v } } } */
+/* { dg-additional-options "-O3 -minline-strncmp" } */
+
+int
+__attribute__ ((noipa))
+foo (const char *s, const char *t)
+{
+ return __builtin_strncmp (s, t, 7);
+}
+
+/* { dg-final { scan-assembler-times "vle8ff" 2 } } */
+/* { dg-final { scan-assembler-times "vfirst.m" 1 } } */
+/* { dg-final { scan-assembler-times "vmor.m" 1 } } */
FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
juzhe.zhong@rivai.ai
From: Robin Dapp
Date: 2023-12-09 00:25
To: gcc-patches; palmer; kito.cheng; Jeff Law; 钟居哲
CC: rdapp.gcc
Subject: Re: [PATCH] RISC-V: Add vectorized strcmp.
Ah, I forgot to attach the current v2 that also enables strncmp.
It was additionally tested with -minline-strncmp on rv64gcv.
Regards
Robin
Subject: [PATCH v2] RISC-V: Add vectorized strcmp and strncmp.
This patch adds vectorized strcmp and strncmp implementations and
tests. Similar to strlen, expansion is still guarded by
-minline-str(n)cmp.
gcc/ChangeLog:
PR target/112109
* config/riscv/riscv-protos.h (expand_strcmp): Declare.
* config/riscv/riscv-string.cc (riscv_expand_strcmp): Add
strategy handling and delegation to scalar and vector expanders.
(expand_strcmp): Vectorized implementation.
* config/riscv/riscv.md: Add TARGET_VECTOR to strcmp and strncmp
expander.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c: New test.
* gcc.target/riscv/rvv/autovec/builtin/strcmp.c: New test.
* gcc.target/riscv/rvv/autovec/builtin/strncmp-run.c: New test.
* gcc.target/riscv/rvv/autovec/builtin/strncmp.c: New test.
---
gcc/config/riscv/riscv-protos.h | 1 +
gcc/config/riscv/riscv-string.cc | 161 +++++++++++++++++-
gcc/config/riscv/riscv.md | 6 +-
.../riscv/rvv/autovec/builtin/strcmp-run.c | 32 ++++
.../riscv/rvv/autovec/builtin/strcmp.c | 13 ++
.../riscv/rvv/autovec/builtin/strncmp-run.c | 136 +++++++++++++++
.../riscv/rvv/autovec/builtin/strncmp.c | 13 ++
7 files changed, 357 insertions(+), 5 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp-run.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp.c
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index c7b5789a4b3..20bbb5b859c 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -558,6 +558,7 @@ void expand_cond_binop (unsigned, rtx *);
void expand_cond_ternop (unsigned, rtx *);
void expand_popcount (rtx *);
void expand_rawmemchr (machine_mode, rtx, rtx, rtx, bool = false);
+bool expand_strcmp (rtx, rtx, rtx, rtx, unsigned HOST_WIDE_INT, bool);
void emit_vec_extract (rtx, rtx, poly_int64);
/* Rounding mode bitfield for fixed point VXRM. */
diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 6cde1bf89a0..11c1f74d0b3 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -511,12 +511,19 @@ riscv_expand_strcmp (rtx result, rtx src1, rtx src2,
return false;
alignment = UINTVAL (align_rtx);
- if (TARGET_ZBB || TARGET_XTHEADBB)
+ if (TARGET_VECTOR && stringop_strategy & STRATEGY_VECTOR)
{
- return riscv_expand_strcmp_scalar (result, src1, src2, nbytes, alignment,
- ncompare);
+ bool ok = riscv_vector::expand_strcmp (result, src1, src2,
+ bytes_rtx, alignment,
+ ncompare);
+ if (ok)
+ return true;
}
+ if ((TARGET_ZBB || TARGET_XTHEADBB) && stringop_strategy & STRATEGY_SCALAR)
+ return riscv_expand_strcmp_scalar (result, src1, src2, nbytes, alignment,
+ ncompare);
+
return false;
}
@@ -1092,4 +1099,152 @@ expand_rawmemchr (machine_mode mode, rtx dst, rtx haystack, rtx needle,
}
}
+/* Implement cmpstr<mode> using vector instructions. The ALIGNMENT and
+ NCOMPARE parameters are unused for now. */
+
+bool
+expand_strcmp (rtx result, rtx src1, rtx src2, rtx nbytes,
+ unsigned HOST_WIDE_INT, bool)
+{
+ gcc_assert (TARGET_VECTOR);
+
+ /* We don't support big endian. */
+ if (BYTES_BIG_ENDIAN)
+ return false;
+
+ bool with_length = nbytes != NULL_RTX;
+
+ if (with_length
+ && (!REG_P (nbytes) && !SUBREG_P (nbytes) && !CONST_INT_P (nbytes)))
+ return false;
+
+ if (with_length && CONST_INT_P (nbytes))
+ nbytes = force_reg (Pmode, nbytes);
+
+ machine_mode mode = E_QImode;
+ unsigned int isize = GET_MODE_SIZE (mode).to_constant ();
+ int lmul = TARGET_MAX_LMUL;
+ poly_int64 nunits = exact_div (BYTES_PER_RISCV_VECTOR * lmul, isize);
+
+ machine_mode vmode;
+ if (!riscv_vector::get_vector_mode (GET_MODE_INNER (mode), nunits)
+ .exists (&vmode))
+ gcc_unreachable ();
+
+ machine_mode mask_mode = riscv_vector::get_mask_mode (vmode);
+
+ /* Prepare addresses. */
+ rtx src_addr1 = copy_addr_to_reg (XEXP (src1, 0));
+ rtx vsrc1 = change_address (src1, vmode, src_addr1);
+
+ rtx src_addr2 = copy_addr_to_reg (XEXP (src2, 0));
+ rtx vsrc2 = change_address (src2, vmode, src_addr2);
+
+ /* Set initial pointer bump to 0. */
+ rtx cnt = gen_reg_rtx (Pmode);
+ emit_move_insn (cnt, CONST0_RTX (Pmode));
+
+ rtx sub = gen_reg_rtx (Pmode);
+ emit_move_insn (sub, CONST0_RTX (Pmode));
+
+ /* Create source vectors. */
+ rtx vec1 = gen_reg_rtx (vmode);
+ rtx vec2 = gen_reg_rtx (vmode);
+
+ rtx done = gen_label_rtx ();
+ rtx loop = gen_label_rtx ();
+ emit_label (loop);
+
+ /* Bump the pointers. */
+ emit_insn (gen_rtx_SET (src_addr1, gen_rtx_PLUS (Pmode, src_addr1, cnt)));
+ emit_insn (gen_rtx_SET (src_addr2, gen_rtx_PLUS (Pmode, src_addr2, cnt)));
+
+ rtx vlops1[] = {vec1, vsrc1};
+ rtx vlops2[] = {vec2, vsrc2};
+
+ if (!with_length)
+ {
+ emit_vlmax_insn (code_for_pred_fault_load (vmode),
+ riscv_vector::UNARY_OP, vlops1);
+
+ emit_vlmax_insn (code_for_pred_fault_load (vmode),
+ riscv_vector::UNARY_OP, vlops2);
+ }
+ else
+ {
+ nbytes = gen_lowpart (Pmode, nbytes);
+ emit_nonvlmax_insn (code_for_pred_fault_load (vmode),
+ riscv_vector::UNARY_OP, vlops1, nbytes);
+
+ emit_nonvlmax_insn (code_for_pred_fault_load (vmode),
+ riscv_vector::UNARY_OP, vlops2, nbytes);
+ }
+
+ /* Read the vl for the next pointer bump. */
+ if (Pmode == SImode)
+ emit_insn (gen_read_vlsi (cnt));
+ else
+ emit_insn (gen_read_vldi_zero_extend (cnt));
+
+ if (with_length)
+ {
+ rtx test_done = gen_rtx_EQ (VOIDmode, cnt, const0_rtx);
+ emit_jump_insn (gen_cbranch4 (Pmode, test_done, cnt, const0_rtx, done));
+ emit_insn (gen_rtx_SET (nbytes, gen_rtx_MINUS (Pmode, nbytes, cnt)));
+ }
+
+ /* Look for a \0 in the first string. */
+ rtx mask0 = gen_reg_rtx (mask_mode);
+ rtx eq0
+ = gen_rtx_EQ (mask_mode, gen_const_vec_duplicate (vmode, CONST0_RTX (mode)),
+ vec1);
+ rtx vmsops1[] = {mask0, eq0, vec1, CONST0_RTX (mode)};
+ emit_nonvlmax_insn (code_for_pred_eqne_scalar (vmode),
+ riscv_vector::COMPARE_OP, vmsops1, cnt);
+
+ /* Look for vec1 != vec2 (includes vec2[i] == 0). */
+ rtx maskne = gen_reg_rtx (mask_mode);
+ rtx ne = gen_rtx_NE (mask_mode, vec1, vec2);
+ rtx vmsops[] = {maskne, ne, vec1, vec2};
+ emit_nonvlmax_insn (code_for_pred_cmp (vmode), riscv_vector::COMPARE_OP,
+ vmsops, cnt);
+
+ /* Combine both masks into one. */
+ rtx mask = gen_reg_rtx (mask_mode);
+ rtx vmorops[] = {mask, mask0, maskne};
+ emit_nonvlmax_insn (code_for_pred (IOR, mask_mode),
+ riscv_vector::BINARY_MASK_OP, vmorops, cnt);
+
+ /* Find the first bit in the mask (the first unequal element). */
+ rtx found_at = gen_reg_rtx (Pmode);
+ rtx vfops[] = {found_at, mask};
+ emit_nonvlmax_insn (code_for_pred_ffs (mask_mode, Pmode),
+ riscv_vector::CPOP_OP, vfops, cnt);
+
+ /* Emit the loop condition. */
+ rtx test = gen_rtx_LT (VOIDmode, found_at, const0_rtx);
+ emit_jump_insn (gen_cbranch4 (Pmode, test, found_at, const0_rtx, loop));
+
+ /* Walk up to the difference point. */
+ emit_insn (
+ gen_rtx_SET (src_addr1, gen_rtx_PLUS (Pmode, src_addr1, found_at)));
+ emit_insn (
+ gen_rtx_SET (src_addr2, gen_rtx_PLUS (Pmode, src_addr2, found_at)));
+
+ /* Load the respective byte and compute the difference. */
+ rtx c1 = gen_reg_rtx (Pmode);
+ rtx c2 = gen_reg_rtx (Pmode);
+
+ do_load_from_addr (mode, c1, src_addr1, src1);
+ do_load_from_addr (mode, c2, src_addr2, src2);
+
+ do_sub3 (sub, c1, c2);
+
+ if (with_length)
+ emit_label (done);
+
+ emit_insn (gen_movsi (result, gen_lowpart (SImode, sub)));
+ return true;
+}
+
}
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 6f9dec8c152..eed997116b0 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -3702,7 +3702,8 @@ (define_expand "cmpstrnsi"
(match_operand:BLK 2)))
(use (match_operand:SI 3))
(use (match_operand:SI 4))])]
- "riscv_inline_strncmp && !optimize_size && (TARGET_ZBB || TARGET_XTHEADBB)"
+ "riscv_inline_strncmp && !optimize_size
+ && (TARGET_ZBB || TARGET_XTHEADBB || TARGET_VECTOR)"
{
if (riscv_expand_strcmp (operands[0], operands[1], operands[2],
operands[3], operands[4]))
@@ -3722,7 +3723,8 @@ (define_expand "cmpstrsi"
(compare:SI (match_operand:BLK 1)
(match_operand:BLK 2)))
(use (match_operand:SI 3))])]
- "riscv_inline_strcmp && !optimize_size && (TARGET_ZBB || TARGET_XTHEADBB)"
+ "riscv_inline_strcmp && !optimize_size
+ && (TARGET_ZBB || TARGET_XTHEADBB || TARGET_VECTOR)"
{
if (riscv_expand_strcmp (operands[0], operands[1], operands[2],
NULL_RTX, operands[3]))
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c
new file mode 100644
index 00000000000..6dec7da91c1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c
@@ -0,0 +1,32 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O3 -minline-strcmp" } */
+
+#include <string.h>
+
+int
+__attribute__ ((noipa))
+foo (const char *s, const char *t)
+{
+ return __builtin_strcmp (s, t);
+}
+
+int
+__attribute__ ((noipa, optimize ("0")))
+foo2 (const char *s, const char *t)
+{
+ return strcmp (s, t);
+}
+
+#define SZ 10
+
+int main ()
+{
+ const char *s[SZ]
+ = {"", "asdf", "0", "\0", "!@#$%***m1123fdnmoi43",
+ "a", "z", "1", "9", "12345678901234567889012345678901234567890"};
+
+ for (int i = 0; i < SZ; i++)
+ for (int j = 0; j < SZ; j++)
+ if (foo (s[i], s[j]) != foo2 (s[i], s[j]))
+ __builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp.c
new file mode 100644
index 00000000000..f9d33a74fc5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { riscv_v } } } */
+/* { dg-additional-options "-O3 -minline-strcmp" } */
+
+int
+__attribute__ ((noipa))
+foo (const char *s, const char *t)
+{
+ return __builtin_strcmp (s, t);
+}
+
+/* { dg-final { scan-assembler-times "vle8ff" 2 } } */
+/* { dg-final { scan-assembler-times "vfirst.m" 1 } } */
+/* { dg-final { scan-assembler-times "vmor.m" 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp-run.c
new file mode 100644
index 00000000000..8d1471a3a13
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp-run.c
@@ -0,0 +1,136 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O3 -minline-strcmp" } */
+
+#include <string.h>
+
+int
+__attribute__ ((noipa, optimize ("0")))
+foo2 (const char *s, const char *t, int n)
+{
+ return strncmp (s, t, n);
+}
+
+#define SZ 11
+
+#define TEST(I, J, N) \
+ int res_##I_##J_##N = __builtin_strncmp (s[I], s[J], N); \
+ int ref_##I_##J_##N = foo2 (s[I], s[J], N); \
+ if (res_##I_##J_##N != ref_##I_##J_##N) \
+ __builtin_abort ();
+
+int main ()
+{
+ const char *s[SZ]
+ = {"", "asdf", "0", "\0", "!@#$%***m1123fdnmoi43",
+ "a", "z", "1", "9", "12345678901234567889012345678901234567890",
+ "ds0fi0349r0sdmfvi0sjf0c9fj034mrx903cw0efmc9jfsicn2390crrm0i90msdfi0sdf0"};
+
+ for (int i = 0; i < SZ; i++)
+ for (int j = 0; j < SZ; j++)
+ {
+ TEST(i, j, 0)
+ TEST(i, j, 1)
+ TEST(i, j, 2)
+ TEST(i, j, 3)
+ TEST(i, j, 4)
+ TEST(i, j, 5)
+ TEST(i, j, 6)
+ TEST(i, j, 7)
+ TEST(i, j, 8)
+ TEST(i, j, 9)
+ TEST(i, j, 10)
+ TEST(i, j, 11)
+ TEST(i, j, 12)
+ TEST(i, j, 13)
+ TEST(i, j, 14)
+ TEST(i, j, 15)
+ TEST(i, j, 16)
+ TEST(i, j, 17)
+ TEST(i, j, 18)
+ TEST(i, j, 19)
+ TEST(i, j, 20)
+ TEST(i, j, 21)
+ TEST(i, j, 22)
+ TEST(i, j, 23)
+ TEST(i, j, 24)
+ TEST(i, j, 25)
+ TEST(i, j, 26)
+ TEST(i, j, 27)
+ TEST(i, j, 28)
+ TEST(i, j, 29)
+ TEST(i, j, 30)
+ TEST(i, j, 31)
+ TEST(i, j, 32)
+ TEST(i, j, 33)
+ TEST(i, j, 34)
+ TEST(i, j, 35)
+ TEST(i, j, 36)
+ TEST(i, j, 37)
+ TEST(i, j, 38)
+ TEST(i, j, 39)
+ TEST(i, j, 40)
+ TEST(i, j, 41)
+ TEST(i, j, 42)
+ TEST(i, j, 43)
+ TEST(i, j, 44)
+ TEST(i, j, 45)
+ TEST(i, j, 46)
+ TEST(i, j, 47)
+ TEST(i, j, 48)
+ TEST(i, j, 49)
+ TEST(i, j, 50)
+ TEST(i, j, 51)
+ TEST(i, j, 52)
+ TEST(i, j, 53)
+ TEST(i, j, 54)
+ TEST(i, j, 55)
+ TEST(i, j, 56)
+ TEST(i, j, 57)
+ TEST(i, j, 58)
+ TEST(i, j, 59)
+ TEST(i, j, 60)
+ TEST(i, j, 61)
+ TEST(i, j, 62)
+ TEST(i, j, 63)
+ TEST(i, j, 64)
+ TEST(i, j, 65)
+ TEST(i, j, 66)
+ TEST(i, j, 67)
+ TEST(i, j, 68)
+ TEST(i, j, 69)
+ TEST(i, j, 70)
+ TEST(i, j, 71)
+ TEST(i, j, 72)
+ TEST(i, j, 73)
+ TEST(i, j, 74)
+ TEST(i, j, 75)
+ TEST(i, j, 76)
+ TEST(i, j, 77)
+ TEST(i, j, 78)
+ TEST(i, j, 79)
+ TEST(i, j, 80)
+ TEST(i, j, 81)
+ TEST(i, j, 82)
+ TEST(i, j, 83)
+ TEST(i, j, 84)
+ TEST(i, j, 85)
+ TEST(i, j, 86)
+ TEST(i, j, 87)
+ TEST(i, j, 88)
+ TEST(i, j, 89)
+ TEST(i, j, 90)
+ TEST(i, j, 91)
+ TEST(i, j, 92)
+ TEST(i, j, 93)
+ TEST(i, j, 94)
+ TEST(i, j, 95)
+ TEST(i, j, 96)
+ TEST(i, j, 97)
+ TEST(i, j, 98)
+ TEST(i, j, 99)
+ TEST(i, j, 100)
+ TEST(i, j, 101)
+ TEST(i, j, 102)
+ TEST(i, j, 103)
+ }
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp.c
new file mode 100644
index 00000000000..a89633ea9d3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { riscv_v } } } */
+/* { dg-additional-options "-O3 -minline-strncmp" } */
+
+int
+__attribute__ ((noipa))
+foo (const char *s, const char *t)
+{
+ return __builtin_strncmp (s, t, 7);
+}
+
+/* { dg-final { scan-assembler-times "vle8ff" 2 } } */
+/* { dg-final { scan-assembler-times "vfirst.m" 1 } } */
+/* { dg-final { scan-assembler-times "vmor.m" 1 } } */
--
2.43.0
> FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
> FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
> FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
> FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
> FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
> FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
> FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
> FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
> FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
> FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
Thanks, which config? For me everything under builtin passes on rv64gcv
and rv32gcv:
PASS: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
PASS: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
PASS: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
PASS: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
PASS: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
PASS: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
PASS: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
PASS: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
PASS: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
PASS: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
Regards
Robin
rv64gcv
juzhe.zhong@rivai.ai
From: Robin Dapp
Date: 2023-12-09 21:51
To: 钟居哲; gcc-patches; palmer; kito.cheng; Jeff Law
CC: rdapp.gcc
Subject: Re: [PATCH] RISC-V: Add vectorized strcmp.
> FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
> FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
> FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
> FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
> FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
> FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
> FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
> FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
> FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
> FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
Thanks, which config? For me everything under builtin passes on rv64gcv
and rv32gcv:
PASS: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
PASS: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
PASS: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
PASS: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
PASS: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
PASS: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
PASS: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
PASS: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
PASS: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
PASS: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
Regards
Robin
> rv64gcv
With -minline-strcmp I assume?
Regards
Robin
I didn't use any special configuration:
--with-arch=rv64gcv_zvl256b --with-abi=lp64d --test --jobs=64 --with-sim=qemu --enable-gcc-checking=yes,assert,extra,rtlflag,rtl,gimple
juzhe.zhong@rivai.ai
From: Robin Dapp
Date: 2023-12-09 22:07
To: 钟居哲; gcc-patches; palmer; kito.cheng; Jeff Law
CC: rdapp.gcc
Subject: Re: [PATCH] RISC-V: Add vectorized strcmp.
> rv64gcv
With -minline-strcmp I assume?
Regards
Robin
FYI. I have the some failures as juzhe mentioned, with the emulator qemu version qemu-riscv64 version 8.1.93 (v8.2.0-rc3). The entire log may look like below:
Executing on host: /home/box/panli/riscv-gnu-toolchain/build-gcc-newlib-stage2/gcc/xgcc -B/home/box/panli/riscv-gnu-toolchain/build-gcc-newlib-stage2/gcc/ /home/box/panli/riscv-gnu-toolchain/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c -march=rv64gcv -mabi=lp64d -mcmodel=medlow --param=riscv-autovec-lmul=m1 --param=riscv-autovec-preference=fixed-vlmax -fdiagnostics-plain-output -ftree-vectorize -O3 --param riscv-autovec-lmul=m1 -O3 -minline-strcmp -lm -o ./strcmp-run.exe (timeout = 600)
spawn -ignore SIGHUP /home/box/panli/riscv-gnu-toolchain/build-gcc-newlib-stage2/gcc/xgcc -B/home/box/panli/riscv-gnu-toolchain/build-gcc-newlib-stage2/gcc/ /home/box/panli/riscv-gnu-toolchain/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c -march=rv64gcv -mabi=lp64d -mcmodel=medlow --param=riscv-autovec-lmul=m1 --param=riscv-autovec-preference=fixed-vlmax -fdiagnostics-plain-output -ftree-vectorize -O3 --param riscv-autovec-lmul=m1 -O3 -minline-strcmp -lm -o ./strcmp-run.exe^M
PASS: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c (test for excess errors)
spawn riscv64-unknown-elf-run ./strcmp-run.exe^M
qemu-riscv64: warning: CPU property 'Zve32f' is deprecated. Please use 'zve32f' instead^M
qemu-riscv64: warning: CPU property 'Zve64f' is deprecated. Please use 'zve64f' instead^M
FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
Pan
From: 钟居哲 <juzhe.zhong@rivai.ai>
Sent: Saturday, December 9, 2023 10:18 PM
To: rdapp.gcc <rdapp.gcc@gmail.com>; gcc-patches <gcc-patches@gcc.gnu.org>; palmer <palmer@dabbelt.com>; kito.cheng <kito.cheng@gmail.com>; Jeff Law <jeffreyalaw@gmail.com>
Cc: rdapp.gcc <rdapp.gcc@gmail.com>
Subject: Re: Re: [PATCH] RISC-V: Add vectorized strcmp.
I didn't use any special configuration:
--with-arch=rv64gcv_zvl256b --with-abi=lp64d --test --jobs=64 --with-sim=qemu --enable-gcc-checking=yes,assert,extra,rtlflag,rtl,gimple
________________________________
juzhe.zhong@rivai.ai<mailto:juzhe.zhong@rivai.ai>
From: Robin Dapp<mailto:rdapp.gcc@gmail.com>
Date: 2023-12-09 22:07
To: 钟居哲<mailto:juzhe.zhong@rivai.ai>; gcc-patches<mailto:gcc-patches@gcc.gnu.org>; palmer<mailto:palmer@dabbelt.com>; kito.cheng<mailto:kito.cheng@gmail.com>; Jeff Law<mailto:jeffreyalaw@gmail.com>
CC: rdapp.gcc<mailto:rdapp.gcc@gmail.com>
Subject: Re: [PATCH] RISC-V: Add vectorized strcmp.
> rv64gcv
With -minline-strcmp I assume?
Regards
Robin
> FYI. I have the some failures as juzhe mentioned, with the emulator
> qemu version qemu-riscv64 version 8.1.93 (v8.2.0-rc3). The entire log
> may look like below:
>
> Executing on host: /home/box/panli/riscv-gnu-toolchain/build-gcc-newlib-stage2/gcc/xgcc -B/home/box/panli/riscv-gnu-toolchain/build-gcc-newlib-stage2/gcc/ /home/box/panli/riscv-gnu-toolchain/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c -march=rv64gcv -mabi=lp64d -mcmodel=medlow --param=riscv-autovec-lmul=m1 --param=riscv-autovec-preference=fixed-vlmax -fdiagnostics-plain-output -ftree-vectorize -O3 --param riscv-autovec-lmul=m1 -O3 -minline-strcmp -lm -o ./strcmp-run.exe (timeout = 600)
>
> spawn -ignore SIGHUP /home/box/panli/riscv-gnu-toolchain/build-gcc-newlib-stage2/gcc/xgcc -B/home/box/panli/riscv-gnu-toolchain/build-gcc-newlib-stage2/gcc/ /home/box/panli/riscv-gnu-toolchain/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c -march=rv64gcv -mabi=lp64d -mcmodel=medlow --param=riscv-autovec-lmul=m1 --param=riscv-autovec-preference=fixed-vlmax -fdiagnostics-plain-output -ftree-vectorize -O3 --param riscv-autovec-lmul=m1 -O3 -minline-strcmp -lm -o ./strcmp-run.exe^M
Thanks, it must be a bug if you both see it. But I cannot reproduce
it yet for some reason. I tried your exact parameters (just didn't
use newlib). Also, for Juzhe it seemed to fail without
-minline-strcmp for you it fails with it. Maybe my testcase uses
undefined behavior? Could you try reducing SZ to 1 for a test?
Regards
Robin
Hi Robin,
I reduced the SZ size from 10 to 1, and the below case with SZ = 2 will fail. The failed location is "foo is 50, foo2 is 12800, i,j is 1, 0".
#define SZ 2
const char *s[SZ] = {"1", "12345678901234567889012345678901234567890"};
Executing on host: /home/pli/gcc/111/riscv-gnu-toolchain/build-gcc-newlib-stage2/gcc/xgcc -B/home/pli/gcc/111/riscv-gnu-toolchain/build-gcc-newlib-stage2/gcc/ exceptions_enabled4031601.cc -march=rv64gcv_zvl512b -mabi=lp64d -mcmodel=medlow --param=riscv-autovec-lmul=m4 -fdiagnostics-plain-output -Wno-complain-wrong-lang -S -o exceptions_enabled4031601.s (timeout = 600)
spawn -ignore SIGHUP /home/pli/gcc/111/riscv-gnu-toolchain/build-gcc-newlib-stage2/gcc/xgcc -B/home/pli/gcc/111/riscv-gnu-toolchain/build-gcc-newlib-stage2/gcc/ exceptions_enabled4031601.cc -march=rv64gcv_zvl512b -mabi=lp64d -mcmodel=medlow --param=riscv-autovec-lmul=m4 -fdiagnostics-plain-output -Wno-complain-wrong-lang -S -o exceptions_enabled4031601.s
PASS: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c (test for excess errors)
spawn riscv64-unknown-elf-run ./strcmp-run.exe
+ QEMU_CPU=rv64,vlen=512,v=true,vext_spec=v1.0,Zve32f=true,Zve64f=true
+ qemu-riscv64 -r 5.10 -L /home/pli/gcc/111/riscv-gnu-toolchain/__RISC-V_INSTALL___/sysroot ./strcmp-run.exe
qemu-riscv64: warning: CPU property 'Zve32f' is deprecated. Please use 'zve32f' instead
qemu-riscv64: warning: CPU property 'Zve64f' is deprecated. Please use 'zve64f' instead
foo is 50, foo2 is 12800, i,j is 1, 0
FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test
Pan
-----Original Message-----
From: Robin Dapp <rdapp.gcc@gmail.com>
Sent: Monday, December 11, 2023 4:34 PM
To: Li, Pan2 <pan2.li@intel.com>; 钟居哲 <juzhe.zhong@rivai.ai>; gcc-patches <gcc-patches@gcc.gnu.org>; palmer <palmer@dabbelt.com>; kito.cheng <kito.cheng@gmail.com>; Jeff Law <jeffreyalaw@gmail.com>
Cc: rdapp.gcc@gmail.com
Subject: Re: [PATCH] RISC-V: Add vectorized strcmp.
> FYI. I have the some failures as juzhe mentioned, with the emulator
> qemu version qemu-riscv64 version 8.1.93 (v8.2.0-rc3). The entire log
> may look like below:
>
> Executing on host: /home/box/panli/riscv-gnu-toolchain/build-gcc-newlib-stage2/gcc/xgcc -B/home/box/panli/riscv-gnu-toolchain/build-gcc-newlib-stage2/gcc/ /home/box/panli/riscv-gnu-toolchain/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c -march=rv64gcv -mabi=lp64d -mcmodel=medlow --param=riscv-autovec-lmul=m1 --param=riscv-autovec-preference=fixed-vlmax -fdiagnostics-plain-output -ftree-vectorize -O3 --param riscv-autovec-lmul=m1 -O3 -minline-strcmp -lm -o ./strcmp-run.exe (timeout = 600)
>
> spawn -ignore SIGHUP /home/box/panli/riscv-gnu-toolchain/build-gcc-newlib-stage2/gcc/xgcc -B/home/box/panli/riscv-gnu-toolchain/build-gcc-newlib-stage2/gcc/ /home/box/panli/riscv-gnu-toolchain/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c -march=rv64gcv -mabi=lp64d -mcmodel=medlow --param=riscv-autovec-lmul=m1 --param=riscv-autovec-preference=fixed-vlmax -fdiagnostics-plain-output -ftree-vectorize -O3 --param riscv-autovec-lmul=m1 -O3 -minline-strcmp -lm -o ./strcmp-run.exe^M
Thanks, it must be a bug if you both see it. But I cannot reproduce
it yet for some reason. I tried your exact parameters (just didn't
use newlib). Also, for Juzhe it seemed to fail without
-minline-strcmp for you it fails with it. Maybe my testcase uses
undefined behavior? Could you try reducing SZ to 1 for a test?
Regards
Robin
Hi Pan,
> I reduced the SZ size from 10 to 1, and the below case with SZ = 2
> will fail. The failed location is "foo is 50, foo2 is 12800, i,j is
> 1, 0".
>
> #define SZ 2
>
> const char *s[SZ] = {"1",
> "12345678901234567889012345678901234567890"};
Thanks. I still cannot reproduce but I think the reason is that
foo2 (so the reference) does something different with newlib as
opposed to libc.
Could you try if the attached helps for you?
Regards
Robin
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c
index 6dec7da91c1..adbe022e0ee 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c
@@ -1,8 +1,6 @@
/* { dg-do run } */
/* { dg-additional-options "-O3 -minline-strcmp" } */
-#include <string.h>
-
int
__attribute__ ((noipa))
foo (const char *s, const char *t)
@@ -10,23 +8,26 @@ foo (const char *s, const char *t)
return __builtin_strcmp (s, t);
}
-int
-__attribute__ ((noipa, optimize ("0")))
-foo2 (const char *s, const char *t)
-{
- return strcmp (s, t);
-}
-
#define SZ 10
-int main ()
+int
+main ()
{
const char *s[SZ]
= {"", "asdf", "0", "\0", "!@#$%***m1123fdnmoi43",
"a", "z", "1", "9", "12345678901234567889012345678901234567890"};
+ const int ref[SZ * SZ]
+ = {0, -97, -48, 0, -33, -97, -122, -49, -57, -49, 97, 0, 49, 97, 64,
+ 115, -25, 48, 40, 48, 48, -49, 0, 48, 15, -49, -74, -1, -9, -1,
+ 0, -97, -48, 0, -33, -97, -122, -49, -57, -49, 33, -64, -15, 33, 0,
+ -64, -89, -16, -24, -16, 97, -115, 49, 97, 64, 0, -25, 48, 40, 48,
+ 122, 25, 74, 122, 89, 25, 0, 73, 65, 73, 49, -48, 1, 49, 16,
+ -48, -73, 0, -8, -50, 57, -40, 9, 57, 24, -40, -65, 8, 0, 8,
+ 49, -48, 1, 49, 16, -48, -73, 50, -8, 0};
+
for (int i = 0; i < SZ; i++)
for (int j = 0; j < SZ; j++)
- if (foo (s[i], s[j]) != foo2 (s[i], s[j]))
+ if (foo (s[i], s[j]) != ref [i * SZ + j])
__builtin_abort ();
}
Yes, I test the patch with all below configurations and there is no failure now. That would be great!
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8/--param=riscv-autovec-preference=fixed-vlmax
Pan
-----Original Message-----
From: Robin Dapp <rdapp.gcc@gmail.com>
Sent: Monday, December 11, 2023 9:15 PM
To: Li, Pan2 <pan2.li@intel.com>; 钟居哲 <juzhe.zhong@rivai.ai>; gcc-patches <gcc-patches@gcc.gnu.org>; palmer <palmer@dabbelt.com>; kito.cheng <kito.cheng@gmail.com>; Jeff Law <jeffreyalaw@gmail.com>
Cc: rdapp.gcc@gmail.com
Subject: Re: [PATCH] RISC-V: Add vectorized strcmp.
Hi Pan,
> I reduced the SZ size from 10 to 1, and the below case with SZ = 2
> will fail. The failed location is "foo is 50, foo2 is 12800, i,j is
> 1, 0".
>
> #define SZ 2
>
> const char *s[SZ] = {"1",
> "12345678901234567889012345678901234567890"};
Thanks. I still cannot reproduce but I think the reason is that
foo2 (so the reference) does something different with newlib as
opposed to libc.
Could you try if the attached helps for you?
Regards
Robin
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c
index 6dec7da91c1..adbe022e0ee 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c
@@ -1,8 +1,6 @@
/* { dg-do run } */
/* { dg-additional-options "-O3 -minline-strcmp" } */
-#include <string.h>
-
int
__attribute__ ((noipa))
foo (const char *s, const char *t)
@@ -10,23 +8,26 @@ foo (const char *s, const char *t)
return __builtin_strcmp (s, t);
}
-int
-__attribute__ ((noipa, optimize ("0")))
-foo2 (const char *s, const char *t)
-{
- return strcmp (s, t);
-}
-
#define SZ 10
-int main ()
+int
+main ()
{
const char *s[SZ]
= {"", "asdf", "0", "\0", "!@#$%***m1123fdnmoi43",
"a", "z", "1", "9", "12345678901234567889012345678901234567890"};
+ const int ref[SZ * SZ]
+ = {0, -97, -48, 0, -33, -97, -122, -49, -57, -49, 97, 0, 49, 97, 64,
+ 115, -25, 48, 40, 48, 48, -49, 0, 48, 15, -49, -74, -1, -9, -1,
+ 0, -97, -48, 0, -33, -97, -122, -49, -57, -49, 33, -64, -15, 33, 0,
+ -64, -89, -16, -24, -16, 97, -115, 49, 97, 64, 0, -25, 48, 40, 48,
+ 122, 25, 74, 122, 89, 25, 0, 73, 65, 73, 49, -48, 1, 49, 16,
+ -48, -73, 0, -8, -50, 57, -40, 9, 57, 24, -40, -65, 8, 0, 8,
+ 49, -48, 1, 49, 16, -48, -73, 50, -8, 0};
+
for (int i = 0; i < SZ; i++)
for (int j = 0; j < SZ; j++)
- if (foo (s[i], s[j]) != foo2 (s[i], s[j]))
+ if (foo (s[i], s[j]) != ref [i * SZ + j])
__builtin_abort ();
}
> Yes, I test the patch with all below configurations and there is no failure now. That would be great!
Thank you! I posted it as a patch now:
https://gcc.gnu.org/pipermail/gcc-patches/2023-December/640182.html
Regards
Robin
@@ -558,6 +558,7 @@ void expand_cond_binop (unsigned, rtx *);
void expand_cond_ternop (unsigned, rtx *);
void expand_popcount (rtx *);
void expand_rawmemchr (machine_mode, rtx, rtx, rtx, bool = false);
+bool expand_strcmp (rtx, rtx, rtx, rtx, unsigned HOST_WIDE_INT, bool);
void emit_vec_extract (rtx, rtx, poly_int64);
/* Rounding mode bitfield for fixed point VXRM. */
@@ -511,12 +511,19 @@ riscv_expand_strcmp (rtx result, rtx src1, rtx src2,
return false;
alignment = UINTVAL (align_rtx);
- if (TARGET_ZBB || TARGET_XTHEADBB)
+ if (TARGET_VECTOR && stringop_strategy & STRATEGY_VECTOR)
{
- return riscv_expand_strcmp_scalar (result, src1, src2, nbytes, alignment,
- ncompare);
+ bool ok = riscv_vector::expand_strcmp (result, src1, src2,
+ bytes_rtx, alignment,
+ ncompare);
+ if (ok)
+ return true;
}
+ if ((TARGET_ZBB || TARGET_XTHEADBB) && stringop_strategy & STRATEGY_SCALAR)
+ return riscv_expand_strcmp_scalar (result, src1, src2, nbytes, alignment,
+ ncompare);
+
return false;
}
@@ -1092,4 +1099,152 @@ expand_rawmemchr (machine_mode mode, rtx dst, rtx haystack, rtx needle,
}
}
+/* Implement cmpstr<mode> using vector instructions. The ALIGNMENT and
+ NCOMPARE parameters are unused for now. */
+
+bool
+expand_strcmp (rtx result, rtx src1, rtx src2, rtx nbytes,
+ unsigned HOST_WIDE_INT, bool)
+{
+ gcc_assert (TARGET_VECTOR);
+
+ /* We don't support big endian. */
+ if (BYTES_BIG_ENDIAN)
+ return false;
+
+ bool with_length = nbytes != NULL_RTX;
+
+ if (with_length
+ && (!REG_P (nbytes) && !SUBREG_P (nbytes) && !CONST_INT_P (nbytes)))
+ return false;
+
+ if (with_length && CONST_INT_P (nbytes))
+ nbytes = force_reg (Pmode, nbytes);
+
+ machine_mode mode = E_QImode;
+ unsigned int isize = GET_MODE_SIZE (mode).to_constant ();
+ int lmul = TARGET_MAX_LMUL;
+ poly_int64 nunits = exact_div (BYTES_PER_RISCV_VECTOR * lmul, isize);
+
+ machine_mode vmode;
+ if (!riscv_vector::get_vector_mode (GET_MODE_INNER (mode), nunits)
+ .exists (&vmode))
+ gcc_unreachable ();
+
+ machine_mode mask_mode = riscv_vector::get_mask_mode (vmode);
+
+ /* Prepare addresses. */
+ rtx src_addr1 = copy_addr_to_reg (XEXP (src1, 0));
+ rtx vsrc1 = change_address (src1, vmode, src_addr1);
+
+ rtx src_addr2 = copy_addr_to_reg (XEXP (src2, 0));
+ rtx vsrc2 = change_address (src2, vmode, src_addr2);
+
+ /* Set initial pointer bump to 0. */
+ rtx cnt = gen_reg_rtx (Pmode);
+ emit_move_insn (cnt, CONST0_RTX (Pmode));
+
+ rtx sub = gen_reg_rtx (Pmode);
+ emit_move_insn (sub, CONST0_RTX (Pmode));
+
+ /* Create source vectors. */
+ rtx vec1 = gen_reg_rtx (vmode);
+ rtx vec2 = gen_reg_rtx (vmode);
+
+ rtx done = gen_label_rtx ();
+ rtx loop = gen_label_rtx ();
+ emit_label (loop);
+
+ /* Bump the pointers. */
+ emit_insn (gen_rtx_SET (src_addr1, gen_rtx_PLUS (Pmode, src_addr1, cnt)));
+ emit_insn (gen_rtx_SET (src_addr2, gen_rtx_PLUS (Pmode, src_addr2, cnt)));
+
+ rtx vlops1[] = {vec1, vsrc1};
+ rtx vlops2[] = {vec2, vsrc2};
+
+ if (!with_length)
+ {
+ emit_vlmax_insn (code_for_pred_fault_load (vmode),
+ riscv_vector::UNARY_OP, vlops1);
+
+ emit_vlmax_insn (code_for_pred_fault_load (vmode),
+ riscv_vector::UNARY_OP, vlops2);
+ }
+ else
+ {
+ nbytes = gen_lowpart (Pmode, nbytes);
+ emit_nonvlmax_insn (code_for_pred_fault_load (vmode),
+ riscv_vector::UNARY_OP, vlops1, nbytes);
+
+ emit_nonvlmax_insn (code_for_pred_fault_load (vmode),
+ riscv_vector::UNARY_OP, vlops2, nbytes);
+ }
+
+ /* Read the vl for the next pointer bump. */
+ if (Pmode == SImode)
+ emit_insn (gen_read_vlsi (cnt));
+ else
+ emit_insn (gen_read_vldi_zero_extend (cnt));
+
+ if (with_length)
+ {
+ rtx test_done = gen_rtx_EQ (VOIDmode, cnt, const0_rtx);
+ emit_jump_insn (gen_cbranch4 (Pmode, test_done, cnt, const0_rtx, done));
+ emit_insn (gen_rtx_SET (nbytes, gen_rtx_MINUS (Pmode, nbytes, cnt)));
+ }
+
+ /* Look for a \0 in the first string. */
+ rtx mask0 = gen_reg_rtx (mask_mode);
+ rtx eq0
+ = gen_rtx_EQ (mask_mode, gen_const_vec_duplicate (vmode, CONST0_RTX (mode)),
+ vec1);
+ rtx vmsops1[] = {mask0, eq0, vec1, CONST0_RTX (mode)};
+ emit_nonvlmax_insn (code_for_pred_eqne_scalar (vmode),
+ riscv_vector::COMPARE_OP, vmsops1, cnt);
+
+ /* Look for vec1 != vec2 (includes vec2[i] == 0). */
+ rtx maskne = gen_reg_rtx (mask_mode);
+ rtx ne = gen_rtx_NE (mask_mode, vec1, vec2);
+ rtx vmsops[] = {maskne, ne, vec1, vec2};
+ emit_nonvlmax_insn (code_for_pred_cmp (vmode), riscv_vector::COMPARE_OP,
+ vmsops, cnt);
+
+ /* Combine both masks into one. */
+ rtx mask = gen_reg_rtx (mask_mode);
+ rtx vmorops[] = {mask, mask0, maskne};
+ emit_nonvlmax_insn (code_for_pred (IOR, mask_mode),
+ riscv_vector::BINARY_MASK_OP, vmorops, cnt);
+
+ /* Find the first bit in the mask (the first unequal element). */
+ rtx found_at = gen_reg_rtx (Pmode);
+ rtx vfops[] = {found_at, mask};
+ emit_nonvlmax_insn (code_for_pred_ffs (mask_mode, Pmode),
+ riscv_vector::CPOP_OP, vfops, cnt);
+
+ /* Emit the loop condition. */
+ rtx test = gen_rtx_LT (VOIDmode, found_at, const0_rtx);
+ emit_jump_insn (gen_cbranch4 (Pmode, test, found_at, const0_rtx, loop));
+
+ /* Walk up to the difference point. */
+ emit_insn (
+ gen_rtx_SET (src_addr1, gen_rtx_PLUS (Pmode, src_addr1, found_at)));
+ emit_insn (
+ gen_rtx_SET (src_addr2, gen_rtx_PLUS (Pmode, src_addr2, found_at)));
+
+ /* Load the respective byte and compute the difference. */
+ rtx c1 = gen_reg_rtx (Pmode);
+ rtx c2 = gen_reg_rtx (Pmode);
+
+ do_load_from_addr (mode, c1, src_addr1, src1);
+ do_load_from_addr (mode, c2, src_addr2, src2);
+
+ do_sub3 (sub, c1, c2);
+
+ if (with_length)
+ emit_label (done);
+
+ emit_insn (gen_movsi (result, gen_lowpart (SImode, sub)));
+ return true;
+}
+
}
@@ -3726,7 +3726,8 @@ (define_expand "cmpstrsi"
(compare:SI (match_operand:BLK 1)
(match_operand:BLK 2)))
(use (match_operand:SI 3))])]
- "riscv_inline_strcmp && !optimize_size && (TARGET_ZBB || TARGET_XTHEADBB)"
+ "riscv_inline_strcmp && !optimize_size
+ && (TARGET_ZBB || TARGET_XTHEADBB || TARGET_VECTOR)"
{
if (riscv_expand_strcmp (operands[0], operands[1], operands[2],
NULL_RTX, operands[3]))
new file mode 100644
@@ -0,0 +1,32 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O3 -minline-strcmp" } */
+
+#include <string.h>
+
+int
+__attribute__ ((noipa))
+foo (const char *s, const char *t)
+{
+ return __builtin_strcmp (s, t);
+}
+
+int
+__attribute__ ((noipa, optimize ("0")))
+foo2 (const char *s, const char *t)
+{
+ return strcmp (s, t);
+}
+
+#define SZ 10
+
+int main ()
+{
+ const char *s[SZ]
+ = {"", "asdf", "0", "\0", "!@#$%***m1123fdnmoi43",
+ "a", "z", "1", "9", "12345678901234567889012345678901234567890"};
+
+ for (int i = 0; i < SZ; i++)
+ for (int j = 0; j < SZ; j++)
+ if (foo (s[i], s[j]) != foo2 (s[i], s[j]))
+ __builtin_abort ();
+}
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { riscv_v } } } */
+/* { dg-additional-options "-O3 -minline-strcmp" } */
+
+int
+__attribute__ ((noipa))
+foo (const char *s, const char *t)
+{
+ return __builtin_strcmp (s, t);
+}
+
+/* { dg-final { scan-assembler-times "vle8ff" 2 } } */
+/* { dg-final { scan-assembler-times "vfirst.m" 1 } } */
+/* { dg-final { scan-assembler-times "vmor.m" 1 } } */