[V2,3/3] split complicate constant to memory

Message ID 20231115030237.1188073-3-guojiufu@linux.ibm.com
State Unresolved
Headers
Series [V2,1/3] rs6000: update num_insns_constant for 2 insns |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

Jiufu Guo Nov. 15, 2023, 3:02 a.m. UTC
  Hi,

Sometimes, a complicated constant is built via 3(or more)
instructions to build. Generally speaking, it would not be
as faster as loading it from the constant pool (as a few
discussions in PR63281):
* "ld" is one instruction.  If consider "address/toc"
  adjust, we may count it as 2 instructions (the high part
  of address computation could be optimized as nop by linker
  further). And "pld" may need less cycles.
* As testing(SPEC2017), it could get better/stable runtime
  if set the threshold as "> 2" (compare with "> 3").

As tested on spec2017, for visible performance changes, we
can find the runtime improvement on 500.perlbench_r about
~1.8% (-O2, P10) with the patch. And for performance
downgrade on other benchmarks, as investigation, the recessions
are not caused by this patch.

Compare with previous version:
https://gcc.gnu.org/pipermail/gcc-patches/2023-October/634197.html
This verion updates commit message.

Boostrap & regtest pass on ppc64{,le}.
Is this ok for trunk?

BR,
Jeff (Jiufu Guo)

	PR target/63281

gcc/ChangeLog:

	* config/rs6000/rs6000.cc (rs6000_emit_set_const): Update to split
	complicate constant to memory.

gcc/testsuite/ChangeLog:

	* gcc.target/powerpc/const_anchors.c: Update to test final-rtl.	
	* gcc.target/powerpc/parall_5insn_const.c: Update to keep original test
	point.
	* gcc.target/powerpc/pr106550.c: Likewise..
	* gcc.target/powerpc/pr106550_1.c: Likewise.
	* gcc.target/powerpc/pr87870.c: Update according to latest behavior.
	* gcc.target/powerpc/pr93012.c: Likewise.

---
 gcc/config/rs6000/rs6000.cc                     | 16 ++++++++++++++++
 .../gcc.target/powerpc/const_anchors.c          |  5 ++---
 .../gcc.target/powerpc/parall_5insn_const.c     | 14 ++++++++++++--
 gcc/testsuite/gcc.target/powerpc/pr106550.c     | 17 +++++++++++++++--
 gcc/testsuite/gcc.target/powerpc/pr106550_1.c   | 15 +++++++++++++--
 gcc/testsuite/gcc.target/powerpc/pr87870.c      |  5 ++++-
 gcc/testsuite/gcc.target/powerpc/pr93012.c      |  4 +++-
 7 files changed, 65 insertions(+), 11 deletions(-)
  

Patch

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index b277c52687b..c878e1030ea 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -10271,6 +10271,22 @@  rs6000_emit_set_const (rtx dest, rtx source)
 	  c = sext_hwi (c, 32);
 	  emit_move_insn (lo, GEN_INT (c));
 	}
+
+      /* If it can be stored to the constant pool and profitable.  */
+      else if (base_reg_operand (dest, mode)
+	       && num_insns_constant (source, mode) > 2)
+	{
+	  rtx sym = force_const_mem (mode, source);
+	  if (TARGET_TOC && SYMBOL_REF_P (XEXP (sym, 0))
+	      && use_toc_relative_ref (XEXP (sym, 0), mode))
+	    {
+	      rtx toc = create_TOC_reference (XEXP (sym, 0), copy_rtx (dest));
+	      sym = gen_const_mem (mode, toc);
+	      set_mem_alias_set (sym, get_TOC_alias_set ());
+	    }
+
+	  emit_insn (gen_rtx_SET (dest, sym));
+	}
       else
 	rs6000_emit_set_long_const (dest, c, NULL);
       break;
diff --git a/gcc/testsuite/gcc.target/powerpc/const_anchors.c b/gcc/testsuite/gcc.target/powerpc/const_anchors.c
index 542e2674b12..188744165f2 100644
--- a/gcc/testsuite/gcc.target/powerpc/const_anchors.c
+++ b/gcc/testsuite/gcc.target/powerpc/const_anchors.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile { target has_arch_ppc64 } } */
-/* { dg-options "-O2" } */
+/* { dg-options "-O2 -fdump-rtl-final" } */
 
 #define C1 0x2351847027482577ULL
 #define C2 0x2351847027482578ULL
@@ -16,5 +16,4 @@  void __attribute__ ((noinline)) foo1 (long long *a, long long b)
   if (b)
     *a++ = C2;
 }
-
-/* { dg-final { scan-assembler-times {\maddi\M} 2 } } */
+/* { dg-final { scan-rtl-dump-times {\madddi3\M} 2 "final" } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c b/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c
index e3a9a7264cf..df0690b90be 100644
--- a/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c
+++ b/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c
@@ -9,8 +9,18 @@ 
 void __attribute__ ((noinline)) foo (unsigned long long *a)
 {
   /* 2 lis + 2 ori + 1 rldimi for each constant.  */
-  *a++ = 0x800aabcdc167fa16ULL;
-  *a++ = 0x7543a876867f616ULL;
+  {
+    register long long d asm("r0") = 0x800aabcdc167fa16ULL;
+    long long n;
+    asm("mr %0, %1" : "=r"(n) : "r"(d));
+    *a++ = n;
+  }
+  {
+    register long long d asm("r0") = 0x7543a876867f616ULL;
+    long long n;
+    asm("mr %0, %1" : "=r"(n) : "r"(d));
+    *a++ = n;
+  }
 }
 
 long long A[] = {0x800aabcdc167fa16ULL, 0x7543a876867f616ULL};
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106550.c b/gcc/testsuite/gcc.target/powerpc/pr106550.c
index 74e395331ab..5eca2b2f701 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr106550.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr106550.c
@@ -1,12 +1,25 @@ 
 /* PR target/106550 */
 /* { dg-options "-O2 -mdejagnu-cpu=power10" } */
 /* { dg-require-effective-target power10_ok } */
+/* { dg-require-effective-target has_arch_ppc64 } */
 
 void
 foo (unsigned long long *a)
 {
-  *a++ = 0x020805006106003; /* pli+pli+rldimi */
-  *a++ = 0x2351847027482577;/* pli+pli+rldimi */  
+  {
+    /* pli+pli+rldimi */
+    register long long d asm("r0") = 0x020805006106003ULL;
+    long long n;
+    asm("mr %0, %1" : "=r"(n) : "r"(d));
+    *a++ = n;
+  }
+  {
+    /* pli+pli+rldimi */  
+    register long long d asm("r0") = 0x2351847027482577ULL;
+    long long n;
+    asm("mr %0, %1" : "=r"(n) : "r"(d));
+    *a++ = n;
+  }
 }
 
 /* { dg-final { scan-assembler-times {\mpli\M} 4 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106550_1.c b/gcc/testsuite/gcc.target/powerpc/pr106550_1.c
index 7e709fcf9d8..11878d893a4 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr106550_1.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr106550_1.c
@@ -12,8 +12,19 @@  foo (unsigned long long *a)
   asm("cntlzd %0, %1" : "=r"(n) : "r"(d));
   *a++ = n;
 
-  *a++ = 0x235a8470a7480000ULL; /* pli+sldi+oris */
-  *a++ = 0x23a184700000b677ULL; /* pli+sldi+ori */
+  {
+    register long long d asm("r0") = 0x235a8470a7480000ULL; /* pli+sldi+oris */
+    long long n;
+    asm("cntlzd %0, %1" : "=r"(n) : "r"(d));
+    *a++ = n;
+  }
+
+  {
+    register long long d asm("r0") = 0x23a184700000b677ULL; /* pli+sldi+ori */
+    long long n;
+    asm("cntlzd %0, %1" : "=r"(n) : "r"(d));
+    *a++ = n;
+  }
 }
 
 /* { dg-final { scan-assembler-times {\mpli\M} 3 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr87870.c b/gcc/testsuite/gcc.target/powerpc/pr87870.c
index d2108ac3386..5fee06744ae 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr87870.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr87870.c
@@ -25,4 +25,7 @@  test3 (void)
   return ((__int128)0xdeadbeefcafebabe << 64) | 0xfacefeedbaaaaaad;
 }
 
-/* { dg-final { scan-assembler-not {\mld\M} } } */
+/* test3 using "ld" to load the value for r3 and r4.
+   test0, test1 and test2 are using "li".  */
+/* { dg-final { scan-assembler-times {\mp?ld\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mli\M} 6 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c b/gcc/testsuite/gcc.target/powerpc/pr93012.c
index a07ff764bbf..ef0f8fabcc6 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr93012.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c
@@ -11,4 +11,6 @@  unsigned long long mskl1() { return 0x2bcdffff2bcdffffULL; }
 unsigned long long mskse() { return 0xffff1234ffff1234ULL; }
 
 /* { dg-final { scan-assembler-times {\mpli\M} 4 { target has_arch_pwr10 }} } */
-/* { dg-final { scan-assembler-times {\mrldimi\M} 7 } } */
+/* { dg-final { scan-assembler-times {\mrldimi\M} 7 { target has_arch_pwr10 } } } */
+/* { dg-final { scan-assembler-times {\mrldimi\M} 3 { target { ! has_arch_pwr10 } } } } */
+/* { dg-final { scan-assembler-times {\mld\M} 4 { target { ! has_arch_pwr10 } } } } */