[PATCHv4,rs6000] Splat vector small V2DI constants with ISA 2.07 instructions [PR104124]

Message ID 8ae0f5d3-9e9f-3880-e651-34df2a8c4361@linux.ibm.com
State Accepted
Headers
Series [PATCHv4,rs6000] Splat vector small V2DI constants with ISA 2.07 instructions [PR104124] |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

HAO CHEN GUI June 25, 2023, 2:09 a.m. UTC
  Hi,
  This patch adds a new insn for vector splat with small V2DI constants on P8.
If the value of constant is in RANGE (-16, 15) and not 0 or -1, it can be loaded
with vspltisw and vupkhsw on P8. It should be efficient than loading vector from
memory.

  Compared to last version, the main change is to remove the new constraint and
use a super constraint in the insn and set the check into insn condition.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen

ChangeLog
2023-06-25  Haochen Gui <guihaoc@linux.ibm.com>

gcc/
	PR target/104124
	* config/rs6000/altivec.md (*altivec_vupkhs<VU_char>_direct): Rename
	to...
	(altivec_vupkhs<VU_char>_direct): ...this.
	* config/rs6000/predicates.md (vspltisw_vupkhsw_constant_split): New
	predicate to test if a constant can be loaded with vspltisw and
	vupkhsw.
	(easy_vector_constant): Call vspltisw_vupkhsw_constant_p to Check if
	a vector constant can be synthesized with a vspltisw and a vupkhsw.
	* config/rs6000/rs6000-protos.h (vspltisw_vupkhsw_constant_p): Declare.
	* config/rs6000/rs6000.cc (vspltisw_vupkhsw_constant_p): New function
	to return true if OP mode is V2DI and can be synthesized with vupkhsw
	and vspltisw.
	* config/rs6000/vsx.md (*vspltisw_v2di_split): New insn to load up
	constants with vspltisw and vupkhsw.

gcc/testsuite/
	PR target/104124
	* gcc.target/powerpc/pr104124.c: New.

patch.diff
  

Comments

Kewen.Lin June 26, 2023, 6:07 a.m. UTC | #1
Hi Haochen,

on 2023/6/25 10:09, HAO CHEN GUI wrote:
> Hi,
>   This patch adds a new insn for vector splat with small V2DI constants on P8.
> If the value of constant is in RANGE (-16, 15) and not 0 or -1, it can be loaded
> with vspltisw and vupkhsw on P8. It should be efficient than loading vector from
> memory.
> 
>   Compared to last version, the main change is to remove the new constraint and
> use a super constraint in the insn and set the check into insn condition.
> 
>   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

This patch is ok for trunk, thanks!

BR,
Kewen

> 
> Thanks
> Gui Haochen
> 
> ChangeLog
> 2023-06-25  Haochen Gui <guihaoc@linux.ibm.com>
> 
> gcc/
> 	PR target/104124
> 	* config/rs6000/altivec.md (*altivec_vupkhs<VU_char>_direct): Rename
> 	to...
> 	(altivec_vupkhs<VU_char>_direct): ...this.
> 	* config/rs6000/predicates.md (vspltisw_vupkhsw_constant_split): New
> 	predicate to test if a constant can be loaded with vspltisw and
> 	vupkhsw.
> 	(easy_vector_constant): Call vspltisw_vupkhsw_constant_p to Check if
> 	a vector constant can be synthesized with a vspltisw and a vupkhsw.
> 	* config/rs6000/rs6000-protos.h (vspltisw_vupkhsw_constant_p): Declare.
> 	* config/rs6000/rs6000.cc (vspltisw_vupkhsw_constant_p): New function
> 	to return true if OP mode is V2DI and can be synthesized with vupkhsw
> 	and vspltisw.
> 	* config/rs6000/vsx.md (*vspltisw_v2di_split): New insn to load up
> 	constants with vspltisw and vupkhsw.
> 
> gcc/testsuite/
> 	PR target/104124
> 	* gcc.target/powerpc/pr104124.c: New.
> 
> patch.diff
> diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
> index 49b0c964f4d..2c932854c33 100644
> --- a/gcc/config/rs6000/altivec.md
> +++ b/gcc/config/rs6000/altivec.md
> @@ -2542,7 +2542,7 @@ (define_insn "altivec_vupkhs<VU_char>"
>  }
>    [(set_attr "type" "vecperm")])
> 
> -(define_insn "*altivec_vupkhs<VU_char>_direct"
> +(define_insn "altivec_vupkhs<VU_char>_direct"
>    [(set (match_operand:VP 0 "register_operand" "=v")
>  	(unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
>  		     UNSPEC_VUNPACK_HI_SIGN_DIRECT))]
> diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
> index 52c65534e51..f62a4d9b506 100644
> --- a/gcc/config/rs6000/predicates.md
> +++ b/gcc/config/rs6000/predicates.md
> @@ -694,6 +694,12 @@ (define_predicate "xxspltib_constant_split"
>    return num_insns > 1;
>  })
> 
> +;; Return true if the operand is a constant that can be loaded with a vspltisw
> +;; instruction and then a vupkhsw instruction.
> +
> +(define_predicate "vspltisw_vupkhsw_constant_split"
> +  (and (match_code "const_vector")
> +       (match_test "vspltisw_vupkhsw_constant_p (op, mode)")))
> 
>  ;; Return 1 if the operand is constant that can loaded directly with a XXSPLTIB
>  ;; instruction.
> @@ -742,6 +748,11 @@ (define_predicate "easy_vector_constant"
>            && xxspltib_constant_p (op, mode, &num_insns, &value))
>  	return true;
> 
> +      /* V2DI constant within RANGE (-16, 15) can be synthesized with a
> +	 vspltisw and a vupkhsw.  */
> +      if (vspltisw_vupkhsw_constant_p (op, mode, &value))
> +	return true;
> +
>        return easy_altivec_constant (op, mode);
>      }
> 
> diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
> index 1a4fc1df668..00cb2d82953 100644
> --- a/gcc/config/rs6000/rs6000-protos.h
> +++ b/gcc/config/rs6000/rs6000-protos.h
> @@ -32,6 +32,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int,
> 
>  extern int easy_altivec_constant (rtx, machine_mode);
>  extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
> +extern bool vspltisw_vupkhsw_constant_p (rtx, machine_mode, int * = nullptr);
>  extern int vspltis_shifted (rtx);
>  extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
>  extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index 3be5860dd9b..ae34a02b282 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -6638,6 +6638,36 @@ xxspltib_constant_p (rtx op,
>    return true;
>  }
> 
> +/* Return true if OP mode is V2DI and can be synthesized with ISA 2.07
> +   instructions vupkhsw and vspltisw.
> +
> +   Return the constant that is being split via CONSTANT_PTR.  */
> +
> +bool
> +vspltisw_vupkhsw_constant_p (rtx op, machine_mode mode, int *constant_ptr)
> +{
> +  HOST_WIDE_INT value;
> +  rtx elt;
> +
> +  if (!TARGET_P8_VECTOR)
> +    return false;
> +
> +  if (mode != V2DImode)
> +    return false;
> +
> +  if (!const_vec_duplicate_p (op, &elt))
> +    return false;
> +
> +  value = INTVAL (elt);
> +  if (value == 0 || value == 1
> +      || !EASY_VECTOR_15 (value))
> +    return false;
> +
> +  if (constant_ptr)
> +    *constant_ptr = (int) value;
> +  return true;
> +}
> +
>  const char *
>  output_vec_const_move (rtx *operands)
>  {
> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
> index 7d845df5c2d..4919b073e50 100644
> --- a/gcc/config/rs6000/vsx.md
> +++ b/gcc/config/rs6000/vsx.md
> @@ -1174,6 +1174,30 @@ (define_insn_and_split "*xxspltib_<mode>_split"
>    [(set_attr "type" "vecperm")
>     (set_attr "length" "8")])
> 
> +(define_insn_and_split "*vspltisw_v2di_split"
> +  [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
> +	(match_operand:V2DI 1 "vspltisw_vupkhsw_constant_split" "W"))]
> +  "TARGET_P8_VECTOR && vspltisw_vupkhsw_constant_split (operands[1], V2DImode)"
> +  "#"
> +  "&& 1"
> +  [(const_int 0)]
> +{
> +  rtx op0 = operands[0];
> +  rtx op1 = operands[1];
> +  rtx tmp = can_create_pseudo_p ()
> +	    ? gen_reg_rtx (V4SImode)
> +	    : gen_lowpart (V4SImode, op0);
> +  int value;
> +
> +  vspltisw_vupkhsw_constant_p (op1, V2DImode, &value);
> +  emit_insn (gen_altivec_vspltisw (tmp, GEN_INT (value)));
> +  emit_insn (gen_altivec_vupkhsw_direct (op0, tmp));
> +
> +  DONE;
> +}
> +  [(set_attr "type" "vecperm")
> +   (set_attr "length" "8")])
> +
> 
>  ;; Prefer using vector registers over GPRs.  Prefer using ISA 3.0's XXSPLTISB
>  ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr104124.c b/gcc/testsuite/gcc.target/powerpc/pr104124.c
> new file mode 100644
> index 00000000000..30e3b6f86eb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr104124.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mdejagnu-cpu=power8 -mpower8-vector -O2" } */
> +/* { dg-require-effective-target powerpc_p8vector_ok } */
> +/* { dg-final { scan-assembler {\mvspltisw\M} } } */
> +/* { dg-final { scan-assembler {\mvupkhsw\M} } } */
> +/* { dg-final { scan-assembler-not {\mlvx\M} } } */
> +
> +#include <altivec.h>
> +
> +vector unsigned long long
> +foo ()
> +{
> +  return vec_splats ((unsigned long long) 12);
> +}
  

Patch

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 49b0c964f4d..2c932854c33 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -2542,7 +2542,7 @@  (define_insn "altivec_vupkhs<VU_char>"
 }
   [(set_attr "type" "vecperm")])

-(define_insn "*altivec_vupkhs<VU_char>_direct"
+(define_insn "altivec_vupkhs<VU_char>_direct"
   [(set (match_operand:VP 0 "register_operand" "=v")
 	(unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
 		     UNSPEC_VUNPACK_HI_SIGN_DIRECT))]
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 52c65534e51..f62a4d9b506 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -694,6 +694,12 @@  (define_predicate "xxspltib_constant_split"
   return num_insns > 1;
 })

+;; Return true if the operand is a constant that can be loaded with a vspltisw
+;; instruction and then a vupkhsw instruction.
+
+(define_predicate "vspltisw_vupkhsw_constant_split"
+  (and (match_code "const_vector")
+       (match_test "vspltisw_vupkhsw_constant_p (op, mode)")))

 ;; Return 1 if the operand is constant that can loaded directly with a XXSPLTIB
 ;; instruction.
@@ -742,6 +748,11 @@  (define_predicate "easy_vector_constant"
           && xxspltib_constant_p (op, mode, &num_insns, &value))
 	return true;

+      /* V2DI constant within RANGE (-16, 15) can be synthesized with a
+	 vspltisw and a vupkhsw.  */
+      if (vspltisw_vupkhsw_constant_p (op, mode, &value))
+	return true;
+
       return easy_altivec_constant (op, mode);
     }

diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 1a4fc1df668..00cb2d82953 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -32,6 +32,7 @@  extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int,

 extern int easy_altivec_constant (rtx, machine_mode);
 extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
+extern bool vspltisw_vupkhsw_constant_p (rtx, machine_mode, int * = nullptr);
 extern int vspltis_shifted (rtx);
 extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
 extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 3be5860dd9b..ae34a02b282 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -6638,6 +6638,36 @@  xxspltib_constant_p (rtx op,
   return true;
 }

+/* Return true if OP mode is V2DI and can be synthesized with ISA 2.07
+   instructions vupkhsw and vspltisw.
+
+   Return the constant that is being split via CONSTANT_PTR.  */
+
+bool
+vspltisw_vupkhsw_constant_p (rtx op, machine_mode mode, int *constant_ptr)
+{
+  HOST_WIDE_INT value;
+  rtx elt;
+
+  if (!TARGET_P8_VECTOR)
+    return false;
+
+  if (mode != V2DImode)
+    return false;
+
+  if (!const_vec_duplicate_p (op, &elt))
+    return false;
+
+  value = INTVAL (elt);
+  if (value == 0 || value == 1
+      || !EASY_VECTOR_15 (value))
+    return false;
+
+  if (constant_ptr)
+    *constant_ptr = (int) value;
+  return true;
+}
+
 const char *
 output_vec_const_move (rtx *operands)
 {
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 7d845df5c2d..4919b073e50 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1174,6 +1174,30 @@  (define_insn_and_split "*xxspltib_<mode>_split"
   [(set_attr "type" "vecperm")
    (set_attr "length" "8")])

+(define_insn_and_split "*vspltisw_v2di_split"
+  [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
+	(match_operand:V2DI 1 "vspltisw_vupkhsw_constant_split" "W"))]
+  "TARGET_P8_VECTOR && vspltisw_vupkhsw_constant_split (operands[1], V2DImode)"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx tmp = can_create_pseudo_p ()
+	    ? gen_reg_rtx (V4SImode)
+	    : gen_lowpart (V4SImode, op0);
+  int value;
+
+  vspltisw_vupkhsw_constant_p (op1, V2DImode, &value);
+  emit_insn (gen_altivec_vspltisw (tmp, GEN_INT (value)));
+  emit_insn (gen_altivec_vupkhsw_direct (op0, tmp));
+
+  DONE;
+}
+  [(set_attr "type" "vecperm")
+   (set_attr "length" "8")])
+

 ;; Prefer using vector registers over GPRs.  Prefer using ISA 3.0's XXSPLTISB
 ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
diff --git a/gcc/testsuite/gcc.target/powerpc/pr104124.c b/gcc/testsuite/gcc.target/powerpc/pr104124.c
new file mode 100644
index 00000000000..30e3b6f86eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr104124.c
@@ -0,0 +1,14 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mdejagnu-cpu=power8 -mpower8-vector -O2" } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-final { scan-assembler {\mvspltisw\M} } } */
+/* { dg-final { scan-assembler {\mvupkhsw\M} } } */
+/* { dg-final { scan-assembler-not {\mlvx\M} } } */
+
+#include <altivec.h>
+
+vector unsigned long long
+foo ()
+{
+  return vec_splats ((unsigned long long) 12);
+}