Hi,
As PR109069 shows, commit r12-6537-g080a06fcb076b3 which
introduces define_insn_and_split sldoi_to_mov adopts
easy_vector_constant for const vector of interest, but it's
wrong since predicate easy_vector_constant doesn't guarantee
each byte in the const vector is the same. One counter
example is the const vector in pr109069-1.c. This patch is
to introduce new predicate const_vector_each_byte_same to
ensure all bytes in the given const vector are the same by
considering both int and float, meanwhile for the constants
which don't meet easy_vector_constant we need to gen a move
instead of just a set, and uses VECTOR_MEM_ALTIVEC_OR_VSX_P
rather than VECTOR_UNIT_ALTIVEC_OR_VSX_P for V2DImode support
under VSX since vector long long type of vec_sld is guarded
under stanza vsx.
Bootstrapped and regtested on powerpc64-linux-gnu P7/P8/P9
and powerpc64le-linux-gnu P9 and P10.
Is it ok for trunk?
BR,
Kewen
-----
PR target/109069
gcc/ChangeLog:
* config/rs6000/altivec.md (sldoi_to_mov<mode>): Replace predicate
easy_vector_constant with const_vector_each_byte_same, add
handlings in preparation for !easy_vector_constant, and update
VECTOR_UNIT_ALTIVEC_OR_VSX_P with VECTOR_MEM_ALTIVEC_OR_VSX_P.
* config/rs6000/predicates.md (const_vector_each_byte_same): New
predicate.
gcc/testsuite/ChangeLog:
* gcc.target/powerpc/pr109069-1.c: New test.
* gcc.target/powerpc/pr109069-2-run.c: New test.
* gcc.target/powerpc/pr109069-2.c: New test.
* gcc.target/powerpc/pr109069-2.h: New test.
---
gcc/config/rs6000/altivec.md | 14 +++-
gcc/config/rs6000/predicates.md | 37 +++++++++
gcc/testsuite/gcc.target/powerpc/pr109069-1.c | 25 ++++++
.../gcc.target/powerpc/pr109069-2-run.c | 50 +++++++++++
gcc/testsuite/gcc.target/powerpc/pr109069-2.c | 12 +++
gcc/testsuite/gcc.target/powerpc/pr109069-2.h | 83 +++++++++++++++++++
6 files changed, 218 insertions(+), 3 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/powerpc/pr109069-1.c
create mode 100644 gcc/testsuite/gcc.target/powerpc/pr109069-2-run.c
create mode 100644 gcc/testsuite/gcc.target/powerpc/pr109069-2.c
create mode 100644 gcc/testsuite/gcc.target/powerpc/pr109069-2.h
--
2.31.1
Hi!
On Mon, Mar 27, 2023 at 04:09:39PM +0800, Kewen.Lin wrote:
> As PR109069 shows, commit r12-6537-g080a06fcb076b3 which
> introduces define_insn_and_split sldoi_to_mov adopts
> easy_vector_constant for const vector of interest, but it's
> wrong since predicate easy_vector_constant doesn't guarantee
> each byte in the const vector is the same.
> * config/rs6000/altivec.md (sldoi_to_mov<mode>): Replace predicate
> easy_vector_constant with const_vector_each_byte_same, add
> handlings in preparation for !easy_vector_constant, and update
> VECTOR_UNIT_ALTIVEC_OR_VSX_P with VECTOR_MEM_ALTIVEC_OR_VSX_P.
> * config/rs6000/predicates.md (const_vector_each_byte_same): New
> predicate.
Okay for trunk. Let's backport this to 13 once 13.1 has been released,
this patch is not very trivial so there is some risk.
Thanks!
Segher
@@ -385,14 +385,22 @@ (define_split
(define_insn_and_split "sldoi_to_mov<mode>"
[(set (match_operand:VM 0 "altivec_register_operand")
- (unspec:VM [(match_operand:VM 1 "easy_vector_constant")
+ (unspec:VM [(match_operand:VM 1 "const_vector_each_byte_same")
(match_dup 1)
(match_operand:QI 2 "u5bit_cint_operand")]
UNSPEC_VSLDOI))]
- "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode) && can_create_pseudo_p ()"
+ "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode) && can_create_pseudo_p ()"
"#"
"&& 1"
- [(set (match_dup 0) (match_dup 1))])
+ [(set (match_dup 0) (match_dup 1))]
+ "{
+ if (!easy_vector_constant (operands[1], <MODE>mode))
+ {
+ rtx dest = gen_reg_rtx (<MODE>mode);
+ emit_move_insn (dest, operands[1]);
+ operands[1] = dest;
+ }
+ }")
(define_insn "get_vrsave_internal"
[(set (match_operand:SI 0 "register_operand" "=r")
@@ -798,6 +798,43 @@ (define_predicate "easy_vector_constant_vsldoi"
(and (match_test "easy_altivec_constant (op, mode)")
(match_test "vspltis_shifted (op) != 0")))))
+;; Return true if this is a vector constant and each byte in
+;; it is the same.
+(define_predicate "const_vector_each_byte_same"
+ (match_code "const_vector")
+{
+ rtx elt;
+ if (!const_vec_duplicate_p (op, &elt))
+ return false;
+
+ machine_mode emode = GET_MODE_INNER (mode);
+ unsigned HOST_WIDE_INT eval;
+ if (CONST_INT_P (elt))
+ eval = INTVAL (elt);
+ else if (CONST_DOUBLE_AS_FLOAT_P (elt))
+ {
+ gcc_assert (emode == SFmode || emode == DFmode);
+ long l[2];
+ real_to_target (l, CONST_DOUBLE_REAL_VALUE (elt), emode);
+ /* real_to_target puts 32-bit pieces in each long. */
+ eval = zext_hwi (l[0], 32);
+ eval |= zext_hwi (l[1], 32) << 32;
+ }
+ else
+ return false;
+
+ unsigned int esize = GET_MODE_SIZE (emode);
+ unsigned char byte0 = eval & 0xff;
+ for (unsigned int i = 1; i < esize; i++)
+ {
+ eval >>= BITS_PER_UNIT;
+ if (byte0 != (eval & 0xff))
+ return false;
+ }
+
+ return true;
+})
+
;; Return 1 if operand is a vector int register or is either a vector constant
;; of all 0 bits of a vector constant of all 1 bits.
(define_predicate "vector_int_reg_or_same_bit"
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-require-effective-target vmx_hw } */
+/* { dg-options "-O2 -maltivec" } */
+
+/* Verify it run successfully. */
+
+#include <altivec.h>
+
+__attribute__ ((noipa))
+vector signed int
+test ()
+{
+ vector signed int v = {-16, -16, -16, -16};
+ vector signed int res = vec_sld (v, v, 3);
+ return res;
+}
+
+int
+main ()
+{
+ vector signed int res = test ();
+ if (res[0] != 0xf0ffffff)
+ __builtin_abort ();
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-require-effective-target vsx_hw } */
+/* { dg-options "-O2 -mvsx" } */
+
+/* Verify it doesn't generate wrong code. */
+
+#include "pr109069-2.h"
+
+int
+main ()
+{
+ vector unsigned char res1 = test1 ();
+ for (int i = 0; i < 16; i++)
+ if (res1[i] != 0xd)
+ __builtin_abort ();
+
+ vector signed short res2 = test2 ();
+ for (int i = 0; i < 8; i++)
+ if (res2[i] != 0x7777)
+ __builtin_abort ();
+
+ vector signed int res3 = test3 ();
+ vector unsigned int res4 = test4 ();
+ vector float res6 = test6 ();
+ for (int i = 0; i < 4; i++)
+ {
+ if (res3[i] != 0xbbbbbbbb)
+ __builtin_abort ();
+ if (res4[i] != 0x7070707)
+ __builtin_abort ();
+ U32b u;
+ u.f = res6[i];
+ if (u.i != 0x17171717)
+ __builtin_abort ();
+ }
+
+ vector unsigned long long res5 = test5 ();
+ vector double res7 = test7 ();
+ for (int i = 0; i < 2; i++)
+ {
+ if (res5[i] != 0x4545454545454545ll)
+ __builtin_abort ();
+ U64b u;
+ u.f = res7[i];
+ if (u.i != 0x5454545454545454ll)
+ __builtin_abort ();
+ }
+ return 0;
+}
+
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* Disable rs6000 optimize_swaps as it drops some REG_EQUAL
+ notes on const vector and affects test point here. */
+/* { dg-options "-O2 -mvsx -mno-optimize-swaps" } */
+
+/* Verify we can optimize away vector shifting if every byte
+ of vector is the same. */
+
+#include "pr109069-2.h"
+
+/* { dg-final { scan-assembler-not {\mvsldoi\M} } } */
new file mode 100644
@@ -0,0 +1,83 @@
+#include <altivec.h>
+
+typedef union
+{
+ unsigned int i;
+ float f;
+} U32b;
+
+typedef union
+{
+ unsigned long long i;
+ double f;
+} U64b;
+
+__attribute__ ((noipa))
+vector unsigned char
+test1 ()
+{
+ vector unsigned char v = {0xd, 0xd, 0xd, 0xd, 0xd, 0xd, 0xd, 0xd,
+ 0xd, 0xd, 0xd, 0xd, 0xd, 0xd, 0xd, 0xd};
+ vector unsigned char res = vec_sld (v, v, 3);
+ return res;
+}
+
+__attribute__ ((noipa))
+vector signed short
+test2 ()
+{
+ vector signed short v
+ = {0x7777, 0x7777, 0x7777, 0x7777, 0x7777, 0x7777, 0x7777, 0x7777};
+ vector signed short res = vec_sld (v, v, 5);
+ return res;
+}
+
+__attribute__ ((noipa))
+vector signed int
+test3 ()
+{
+ vector signed int v = {0xbbbbbbbb, 0xbbbbbbbb, 0xbbbbbbbb, 0xbbbbbbbb};
+ vector signed int res = vec_sld (v, v, 7);
+ return res;
+}
+
+__attribute__ ((noipa))
+vector unsigned int
+test4 ()
+{
+ vector unsigned int v = {0x07070707, 0x07070707, 0x07070707, 0x07070707};
+ vector unsigned int res = vec_sld (v, v, 9);
+ return res;
+}
+
+__attribute__ ((noipa))
+vector unsigned long long
+test5 ()
+{
+ vector unsigned long long v = {0x4545454545454545ll, 0x4545454545454545ll};
+ vector unsigned long long res = vec_sld (v, v, 10);
+ return res;
+}
+
+__attribute__ ((noipa))
+vector float
+test6 ()
+{
+ U32b u;
+ u.i = 0x17171717;
+ vector float vf = {u.f, u.f, u.f, u.f};
+ vector float res = vec_sld (vf, vf, 11);
+ return res;
+}
+
+__attribute__ ((noipa))
+vector double
+test7 ()
+{
+ U64b u;
+ u.i = 0x5454545454545454ll;
+ vector double vf = {u.f, u.f};
+ vector double res = vec_sld (vf, vf, 13);
+ return res;
+}
+